{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 3198/3198 [00:12<00:00, 264.62it/s]\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import nltk\n",
    "import re\n",
    "\n",
    "from collections import Counter\n",
    "\n",
    "from tqdm import tqdm\n",
    "\n",
    "root_dir = \"/Users/minjoons/data/cnn/questions\"\n",
    "data_dir = os.path.join(root_dir, \"test\")\n",
    "\n",
    "char_counter = Counter()\n",
    "word_counter = Counter()\n",
    "ent_counter = Counter()\n",
    "max_num_words = 0\n",
    "max_num_ques_words = 0\n",
    "max_num_sents = 0\n",
    "max_num_words_per_sent = 0\n",
    "max_num_chars = 0\n",
    "\n",
    "nums_words = []\n",
    "nums_ques_words = []\n",
    "nums_sents = []\n",
    "nums_words_per_sent = []\n",
    "nums_chars = []\n",
    "nums_entities = []\n",
    "\n",
    "sent_tokenize = lambda x: re.split(\"[.!?]\", x)\n",
    "sent_tokenize = nltk.sent_tokenize\n",
    "\n",
    "num_ques = len(list(os.listdir(data_dir)))\n",
    "\n",
    "cand_set= set()\n",
    "\n",
    "for path in tqdm(os.listdir(data_dir), total=num_ques):\n",
    "    if path.endswith(\".question\"):\n",
    "        with open(os.path.join(data_dir, path), 'r') as fh:\n",
    "            url = fh.readline().strip()\n",
    "            _ = fh.readline()\n",
    "            para = fh.readline().strip()\n",
    "            _ = fh.readline()\n",
    "            ques = fh.readline().strip()\n",
    "            _ = fh.readline()\n",
    "            answer = fh.readline().strip()\n",
    "            _ = fh.readline()\n",
    "            cands = list(line.strip() for line in fh)\n",
    "            cand_ents, cand_names = zip(*[cand.split(\":\") for cand in cands])\n",
    "            cand_set = cand_set | set(cand_names)\n",
    "            words = para.split(\" \")\n",
    "            sents = sent_tokenize(para)\n",
    "            wordss = list(sent.split(\" \") for sent in sents)\n",
    "            ques_words = ques.split(\" \")\n",
    "            \n",
    "            ents = [word for word in words if word.startswith(\"@\")]\n",
    "            num_ents = len(ents)\n",
    "            \n",
    "            nums_entities.append(num_ents)\n",
    "            nums_words.append(len(words))\n",
    "            nums_ques_words.append(len(ques_words))\n",
    "            nums_sents.append(len(sents))\n",
    "            nums_words_per_sent.extend(map(len, wordss))\n",
    "            nums_chars.extend(map(len, words))\n",
    "            \n",
    "            for word in ques_words:\n",
    "                if word.startswith(\"@\"):\n",
    "                    ent_counter[word] += 1\n",
    "                else:\n",
    "                    word_counter[word] += 1\n",
    "                    for c in word:\n",
    "                        char_counter[c] += 1\n",
    "            \n",
    "            for word in words:\n",
    "                if word.startswith(\"@\"):\n",
    "                    ent_counter[word] += 1\n",
    "                else:\n",
    "                    word_counter[word] += 1\n",
    "                    for c in word:\n",
    "                        char_counter[c] += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(22747, 465, 77, 12465)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(word_counter), len(ent_counter), len(char_counter), len(cand_set)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1989, 37, 122, 443, 24)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "max_num_words, max_num_ques_words, max_num_sents, max_num_words_per_sent, max_num_chars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['hello', ' Wow', ' Hmm', '']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "re.split(\"[.!?]\", \"hello. Wow! Hmm?\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEACAYAAABfxaZOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAG9NJREFUeJzt3Xt4VNW5x/HvC6IckXoBi8hVEYt4KRZFVJC0iEZqC2Kr\nWErFpxaeg1zaqhVO29PYxxs9RytqvWAjCmqRgop6sCDoWFA01IIgEC4ql0RAwEq5ySWs88caYQyB\nTMjMrJnZv8/z7CczOzuTN5vJm8W71n63OecQEZFoqBM6ABERyRwlfRGRCFHSFxGJECV9EZEIUdIX\nEYkQJX0RkQipNumbWbGZrTezBYc45gEzW25m882sQ2pDFBGRVElmpD8WuPxgnzSzK4A2zrm2wCDg\n0RTFJiIiKVZt0nfOzQb+dYhDegHj4se+CxxrZk1SE56IiKRSKmr6zYA1Cc/L4/tERCTLaCJXRCRC\njkjBa5QDLRKeN4/vO4CZqdGPiMhhcM5ZKl4n2aRv8a0qLwE3Ac+ZWWfgc+fc+oO9kBq8pU5RURFF\nRUWhw8gbOp+pk6vn0jnYtg327IGKiv0ft2+H1ath5UpYtQo2bYLNm2HrVti1y287d/qPX3zhP79x\nI9SpAyeeuH9r0gTOPBPOOcdvTZqAJZHKLZmDklRt0jezZ4ECoJGZrQZ+BxwJOOfcGOfcVDPraWYr\ngG3ADSmLTkQkxZzziXnLFp+0t23z2/r1cPvtsGgR1K8PRxwBdev6rX59aNUKWreGli3hG9+AY4+F\nY46Bo46CI4/86sdGjaBxYzj66NA/7YGqTfrOuR8lccyQ1IQjIpIamzfDm2/CwoX+4+LFPslv3Qr1\n6vmE/eV29NHQoAEMHQrXX+9H6PkqFTV9CaSgoCB0CHlF5zN10n0uN2+Gn//cl1B27vTbF1/sf7xz\nJ2zYAJ07Q8eO0K8fdOu2f3Rer15aw8tqlskau5k51fRFpCacg/JyGDcOSkp8+WXFCvjOd2D4cF9O\nSdzq1/cfmzXzj/OBmaVsIldJX0SyUnk5jBgBr7ziJ1SvvBKuuspPgJ56qq+fR0Uqk77KOyISzPbt\nfhJ11SpYutSP4P/5T1+2WbgQBgzwtfiTTkpulYtUTyN9EcmYNWtg0iQoLYXly2HOHF9jb9rUL2Vs\n3RrOPdc/b9fOL3MUlXdEJIs5B599BmVl+7c1a/zHGTN8Lb5zZ5/gL7nEJ305NCV9EckqL78MDz4I\nH3/sk3v9+tC8ObRo4T9+uZ1xBlx4Yehoc49q+iIShHOwbJmvwX85gv/4Y3jxRSgu9iWa5s01es9m\nGumLSFLeegvuv9/X4du12z96b9HCj97POSd0hPlLI30RSYvXXoOZM+HTT+Hf/96/bdjg6/SDB8NT\nT2VnewFJjkb6IhG2cye88QZ88IFfTfPCC/6Cp5NO8levNmwIX/ua31q29M8l8zSRKyKHrbgYpk/3\nbQsWLfLLIi+8ENq0gYICX5eX7KLyjogkzTk/6TpzJrz3nh/N/+EPvkTTtCl06qQLn6JESV8kjzjn\nV9N89plfYfPMM75O37AhnHceXHyxf37GGaEjlVBU3hHJA5s3w9Sp8Nhjvp3BySf7G3T06eP71TRq\nFDpCqQ3V9EVknzvugP/9X3916xVXwMCB/sYfkj9U0xeJuK1b4f33YdYsv3Z+0SLfSlikOnl8fxiR\n/LJ7t18v/9vf+nLNL37h79kaiynhS/I00hfJYp9/Dn/9K4wfv78jZdeusGSJ7ykvUlOq6YtkmbIy\nv+pmzBhYt87X6fv39x+jdOMQ2U81fZE8NWMG9O0LPXv6vvNnnw1H6LdUUkhvJ5HAnPNdKkeN8l0r\nJ070PedF0kETuSKBTZgAt97qSzirVyvhS3pppC8SyI4dvnY/cqS/CUnnzqEjkihQ0hcJoKwMrrwS\njjsOpkxRwpfMUXlHJEOc86txnnvO98ApKPBtjS+6KHRkEiUa6Yuk2a5dMGSIX2vfsCG0bw9PPAHd\nu4eOTKJISV8kjUpKfC+cU06BtWt9OUckJJV3RNLkxRf9evvhw+H555XwJTvoilyRNFi92t+c5Mkn\nobAwdDSS69RaWSSL7d4NP/mJL+ncdVfoaCQfpDLpq7wjkkKzZ/tGaMuW+bKOSLZR0hdJgbVrfV/7\nPn3gvvv8vWibNAkdlciBtHpHpJbWrIHzz/fr7qdNg3PPDR2RyMGppi9SC875lsddusBvfhM6GslX\nqumLZIniYvjkE7jtttCRiCQnqaRvZoVmVmpmy8zsgLe3mTUys1fNbL6ZLTSzASmPVCTLvPMO/PrX\n8PjjUK9e6GhEklNtecfM6gDLgO7AJ8BcoK9zrjThmN8B9Z1zI82sMbAUaOKc21PptVTekZy3YYOf\nsP3wQ3j4YejdO3REku8yXd7pBCx3zq1yzu0GJgC9Kh2zDmgYf9wQ2FQ54YvkOuegtBR++ENo29bf\nlFwJX3JNMkm/GbAm4XlZfF+ix4EzzewT4H1AK5Qlb+zaBYMHw8knQ48ecMklvqSj+9VKLkrVks2R\nwPvOuW+bWRvgNTM7xzm3tfKBRUVF+x4XFBRQUFCQohBEUs85+OUv/ah+zhxo1QosJf/JFjm4WCxG\nLBZLy2snU9PvDBQ55wrjz0cAzjk3KuGYqcCdzrm34s9nArc55/5R6bVU05ecMmqUb4n8+uvw9a+H\njkaiKpU1/WRG+nOB08ysFbAW6AtcV+mYJcClwFtm1gQ4HfgoFQGKhLBqle95/9hjfpWOEr7ki2pr\n+s65CmAIMB1YBExwzi0xs0FmNjB+2N3AeWb2PvAa8Cvn3GfpCloknd56y19hu2kTxGLQunXoiERS\nR1fkiiRYvNhfXfvYY36Vjkg2UGtlkTTYsgWuvtqvzlFLBckmSvoiadCjBzRuDGPHQv36oaMR2S/T\nE7kiee/++2HBAn/Hq6OOCh2NSPoo6Uvkvf463HEHvP22Er7kP3XZlMjats33zrn2WvjLX+D000NH\nJJJ+SvoSOc7BlClw1lnwf/8H06f7er5IFKi8I5FSXg79+8PGjXDPPXDNNWqrINGikb5EgnMwZIgv\n4XTqBPPm+bKOEr5EjUb6EgkPPAAlJbBiBTRtGjoakXC0Tl/y3rp1cOaZMHcunHpq6GhEak73yBVJ\nUmkpXHYZ/PjHSvgioKQveezRR6FjR7jxRn/xlYiopi956r774MEH/VW2bdqEjkYke6imL3ln2jQY\nMMBP3LZoEToakdpTTV+kCs75G59ceSU89JASvkhVVN6RvLByJfzsZ/Dpp76Hzvnnh45IJDsp6UvO\nW7UKvv99uOACeOUVNU0TORSVdySnrVwJHTpAnz7+bldK+CKHpolcyWl33AEffeRr+SL5SjdREQFm\nzfLr7+fMCR2JSO5QeUdy0u23+/vZPvQQtG0bOhqR3KHyjuSct96Cyy/3vXTOOCN0NCLpp3X6EknO\nwW9+A717w+TJSvgih0M1fckJFRXwpz/B+PF+pK9bG4ocHiV9yXrbt8PFF/sbnjz7rBK+SG0o6UtW\n27QJvvtdOOccePJJ3elKpLY0kStZq7zct0XeuxemToW6dUNHJBKGJnIlb+3Y4ev2ffr4u121bAnP\nPKOEL5IqGulLVrn+en+3q4EDfT+dE08MHZFIeLoiV/LOtm3w3/8NM2b4G580ahQ6IpH8pKQvWeGW\nW2DRIn8DFCV8kfRR0pfgZs+GiRPhgw+gadPQ0YjkN03kSlCbNsGgQTB6tBK+SCZoIleC2bMHunb1\nF1uNHQt1NAQRqZKWbEpe+OMfoUEDJXyRTErqV83MCs2s1MyWmdltBzmmwMzmmdkHZvZGasOUfDN9\nOvzhD/DII0r4IplUbXnHzOoAy4DuwCfAXKCvc6404ZhjgbeBy5xz5WbW2Dm3sYrXUnlHWL0aevTw\nSb9Xr9DRiGS/TJd3OgHLnXOrnHO7gQlA5V/VHwGTnXPlAFUlfBGAefP8PW379fMXX4lIZiWT9JsB\naxKel8X3JTodOMHM3jCzuWbWP1UBSn4ZORLuvttfiKXmaSKZl6p1+kcA3wK+AzQA5pjZHOfcihS9\nvuS4rVvhzjv9WvwXXwwdjUh0JZP0y4GWCc+bx/clKgM2Oue+AL4ws78D3wQOSPpFRUX7HhcUFFBQ\nUFCziCXnbN0KZ58Np53myzv164eOSCS7xWIxYrFYWl47mYncusBS/ETuWqAEuM45tyThmHbAg0Ah\ncBTwLnCtc25xpdfSRG7E7Nzp72fbrh08+mjoaERyU0YbrjnnKsxsCDAdPwdQ7JxbYmaD/KfdGOdc\nqZlNAxYAFcCYyglfomfPHhg8GE44AR5+OHQ0IgK6IlfS6Pe/h1df9WvyGzYMHY1I7lJrZclqmzf7\nFTrPPguvvaaEL5JNlPQl5QYPhs8/h1gMTj01dDQikkjlHUmpzz/3iX7RInXNFEkVNVyTrPT++/Dt\nb8OAAUr4ItlKSV9qbelS31Lhu9+FG26Ae+8NHZGIHIzKO1Jr/frBscf6ZP8f/xE6GpH8o9U7kjU2\nbvQrdN59VwlfJBeovCOHbe9e+MEPfEnnlFNCRyMiyVDSl8Myfz507gz16sFdd4WORkSSpaQvNbJ7\nN/zqV3DZZX49/rRpULdu6KhEJFmq6UuNzJgBkyf7bpnNKt9VQUSynkb6krQtW2DECPiv/1LCF8lV\nWrIpSXEOBg2CHTtg3Djd9Uokk7RkUzLu+edh5kyYM0cJXySXaaQv1frsM+jYER55BAoLQ0cjEj2p\nHOkr6csh7d3rWyycfjrcd1/oaESiSeUdyZi//AXWr4cXXggdiYikgkb6clA7dsCZZ8LYsdCtW+ho\nRKJLrZUl7WbNgt694YILlPBF8omSvhzgpZfgqqv8pG1xcehoRCSVVN6RA/z4x3DRRb7NgoiEp/KO\npM2uXfDOO3DhhaEjEZF0UNKXfcrKoGdPaNUKvvnN0NGISDoo6Qt798KECX7S9txzfefMOnpniOQl\nrdMXRo+GJ57wk7a64lYkv2kiN+LKyuD88+HVV6FDh9DRiEhVNJErKTN0KPz0p0r4IlGh8k6EjRoF\nJSUwfnzoSEQkU5T0I2jPHrjpJn8XrHffhWOOCR2RiGSKyjsR8/HH/sKr1avhvfegefPQEYlIJinp\nR8jixX5Z5qWXwiuvwHHHhY5IRDJNq3ci5NproVMnuPnm0JGISE1o9Y7UWHGx75w5YEDoSEQkJE3k\n5rmdO+F//gf++EeYPRsaNQodkYiEpKSfx5yDq6+GLVtg/nxo0SJ0RCISmpJ+HisuhjVrYO5cOPLI\n0NGISDZIqqZvZoVmVmpmy8zstkMcd76Z7TazPqkLUWpq2zb49a/9NnasEr6I7FftSN/M6gAPAd2B\nT4C5ZjbFOVdaxXH3ANPSEagk7+ab/fLM+fOhadPQ0YhINkmmvNMJWO6cWwVgZhOAXkBppeOGApOA\n81MaodRIWRlMnAjLl2vSVkQOlEx5pxmwJuF5WXzfPmZ2MtDbOfcIkJK1pHJ4Jkzwk7dK+CJSlVRN\n5N4PJNb6lfgDmDIFiorgb38LHYmIZKtkkn450DLhefP4vkTnARPMzIDGwBVmtts591LlFysqKtr3\nuKCggIKCghqGLFV58024/np4/HHo0iV0NCJSG7FYjFgslpbXrrYNg5nVBZbiJ3LXAiXAdc65JQc5\nfizwsnPu+So+pzYMaVBWBh07+iWaV14ZOhoRSbVUtmGodqTvnKswsyHAdPwcQLFzbomZDfKfdmMq\nf0kqApPkbNgAAwfCjTcq4YtI9dRwLcddc42/ifmYMfC1r4WORkTSIaMjfcle//iH76ezYgUcfXTo\naEQkF6jLZg675x5/1a0SvogkS+WdHLV4MXTtCqWlcOKJoaMRkXRSP/2I27sXhg/3a/KV8EWkJpT0\nc8zq1dC/P+zYAYMGhY5GRHKNkn4O2bkTunWDE07wV92qe6aI1JRq+jnkZz+Df/0LJk0KHYmIZFIq\na/pK+jli1So46yxYuVLN1ESiRhO5EfPRR3DhhXD33Ur4IlI7GunngFtvhS++gAcfDB2JiISg8k6E\nlJZCp06wdKnugiUSVSrvRMSGDf4CrLvvVsIXkdTQSD9L7d3rV+s0aAAPPBA6GhEJSQ3X8tw778DI\nkX5d/ksH3IZGROTwqbyTZSoq4KqroHdvmDULGjcOHZGI5BON9LPM00/DKafAsGFgutOwiKSYavpZ\npKIC2rXztz285JLQ0YhIttDqnTz16KO+a2bXrqEjEZF8pZF+lli3Dtq3h7ff9qN9EZEvaaSfZ8aP\nhzPOgMGDlfBFJL000g+sogI6doRRo+Dyy0NHIyLZSCP9PHLvvXD88dCjR+hIRCQKNNIPaNs2aN4c\nSkqgbdvQ0YhIttJIP0+8+qqv4Svhi0im6OKsQD74wE/c/vnPoSMRkShReSeQAQOgVSu4/fbQkYhI\ntlPDtRw3bRq8/jq8917oSEQkalTTz7CNG+GWW2D4cH/1rYhIJinpZ9izz8Jpp8HPfx46EhGJIiX9\nDNmzx/fWGT0aBg6EunVDRyQiUaSknyGTJsGf/gR33gmFhaGjEZGo0uqdDPne9+Caa6B//9CRiEiu\nSeXqHSX9DNi4Edq0gbIyaNgwdDQikmt0RW6OmTgRrrhCCV9EwtM6/TRbssR30CwuDh2JiIhG+mn1\n73/D1VfDL34Bl14aOhoRkSSTvpkVmlmpmS0zs9uq+PyPzOz9+DbbzM5Ofai5Zds26NnT98ofNix0\nNCIiXrUTuWZWB1gGdAc+AeYCfZ1zpQnHdAaWOOc2m1khUOSc61zFa0VmInfoUNi0yd8VS2vyRaQ2\nMt17pxOw3Dm3Kv7NJwC9gH1J3zn3TsLx7wDNUhFcrlqwAJ5/3nfSVMIXkWySTHmnGbAm4XkZh07q\nNwKv1iaoXPf003Dttf6OWCIi2SSlq3fM7NvADUCXgx1TVFS073FBQQEFBQWpDCG4efNg3Dj4299C\nRyIiuSoWixGLxdLy2snU9Dvja/SF8ecjAOecG1XpuHOAyUChc+7Dg7xWXtf0Kyrg1FNhxAj4z/8M\nHY2I5IuMXpFrZnWBpfiJ3LVACXCdc25JwjEtgZlA/0r1/cqvlddJf/BgWLgQZs0KHYmI5JOMTuQ6\n5yrMbAgwHT8HUOycW2Jmg/yn3Rjgt8AJwMNmZsBu51ynVASYK954A6ZPh/nzQ0ciInJw6r2TAh9/\nDN27w113Qd++oaMRkXyjhmtZZPduuOgiv1rnlltCRyMi+UgN17LIxInQoAHcfHPoSEREqqekX0ux\nGPzgB2Ap+RssIpJe6rJZC5Mnw1NPwaJFoSMREUmORvqHyTm491548klo2zZ0NCIiyVHSP0yTJsGO\nHX4CV0QkV6i8cxhWrPAXYk2apIZqIpJbNNI/DLNmQY8e0K1b6EhERGpGSf8wPPMMXH556ChERGpO\nF2fV0OrVcN55UF4O9eqFjkZEokAXZwX0wgtw8cVK+CKSmzTSr4Ht26FlS/j736F9+9DRiEhUaKQf\ngHNQVAQXXKCELyK5S0s2k+AcDBzo2yZPmRI6GhGRw6ekn4TnnoOSEnj7bd9cTUQkV6m8U429e+H+\n+/0tEJXwRSTXKelXY9w430FT7RZEJB+ovHMI69bBHXdAcTHU0Z9HEckDSmVVcA5mzoSzzoKrroKu\nXUNHJCKSGhrpV2H0aN82efx4uOKK0NGIiKSOkn6CNWtg4UJ44AF4+mk1VBOR/KOkH3fTTb6R2nnn\n+ceXXBI6IhGR1FPSx4/qZ8yA5cvhxBNDRyMikj6R772zcyd84xs+8XfpEjoaEZEDqfdOiuzZA7fc\nAh06KOGLSDREtrzz4YcwdCisXw+TJ4eORkQkMyI70h80CE4+GaZNg9atQ0cjIpIZkazpL1gA3bvD\nypXqpyMi2U81/VpwDu67D4YPV8IXkeiJ3Eh/4EB4802YPVvLM0UkN6RypB+pidy//hViMXjvPWjY\nMHQ0IiKZF4mkv22bT/i33govv6yELyLRldc1/e3b4ac/hebN4cknYfp06Nw5dFQiIuHk7Uh/+3bo\n1w+2bIFFi/zyTBGRqMvLpL9rF/Ts6W98MnUq1K8fOiIRkeyQVHnHzArNrNTMlpnZbQc55gEzW25m\n882sQ2rDTN7KldC+vR/pT5yohC8ikqjapG9mdYCHgMuBM4HrzKxdpWOuANo459oCg4BH0xBrtcrK\n4IILYNgwKCmBxo1DRJE5sVgsdAh5ReczdXQus1cyI/1OwHLn3Crn3G5gAtCr0jG9gHEAzrl3gWPN\nrElKIz2E9et9L/z27f1FV8OGZeo7h6VfrNTS+UwdncvslUxNvxmwJuF5Gf4PwaGOKY/vW1+r6KpQ\nUeGXYG7f7tfbjxzp73jVpQuMGQN9+6b6O4qI5I+cmMgdP96P4Ldvh9274eijfQuFNm18a+R+/aBu\n3dBRiohkv2rbMJhZZ6DIOVcYfz4CcM65UQnHPAq84Zx7Lv68FOjmnFtf6bXCd1sTEclBmWzDMBc4\nzcxaAWuBvsB1lY55CbgJeC7+R+LzygkfUhe0iIgcnmqTvnOuwsyGANPxE7/FzrklZjbIf9qNcc5N\nNbOeZrYC2AbckN6wRUTkcGS0y6aIiISVsd47yVzgJV9lZivN7H0zm2dmJfF9x5vZdDNbambTzOzY\nhONHxi+QW2Jml4WLPDuYWbGZrTezBQn7anz+zOxbZrYg/t69P9M/R7Y4yPn8nZmVmdk/41thwud0\nPg/CzJqb2etmtsjMFprZsPj+9L8/nXNp3/B/XFYArYB6wHygXSa+dy5vwEfA8ZX2jQJ+FX98G3BP\n/HF7YB6+ZNc6fr4t9M8Q+Px1AToAC2pz/oB3gfPjj6cCl4f+2bLofP4O+GUVx56h83nIc3kS0CH+\n+BhgKdAuE+/PTI30k7nASw5kHPi/sV7AU/HHTwG944+/D0xwzu1xzq0ElnPg9RSR4pybDfyr0u4a\nnT8zOwlo6JybGz9uXMLXRMpBzif492llvdD5PCjn3Drn3Pz4463AEqA5GXh/ZirpV3WBV7MMfe9c\n5oDXzGyumd0Y39fExVdGOefWAV+P7z/YBXLyVV+v4flrhn+/fknv3QMNiffc+nNCOULnM0lm1hr/\nP6h3qPnvd43PZ173088DFzvnvgX0BG4ys674PwSJNBNfOzp/tfMwcKpzrgOwDrg3cDw5xcyOASYB\nw+Mj/rT/fmcq6ZcDLROeN4/vk0Nwzq2Nf9wAvIgv16z/sq9R/L92n8YPLwdaJHy5znHVanr+dF4P\nwTm3wcWLycDj7C8p6nxWw8yOwCf88c65KfHdaX9/Zirp77vAy8yOxF/g9VKGvndOMrOj46MAzKwB\ncBmwEH/eBsQPux748s3yEtDXzI40s1OA04CSjAadnYyv1pxrdP7i/8XebGadzMyAnyR8TRR95XzG\nE9OX+gAfxB/rfFbvCWCxc250wr70vz8zOFtdiJ+hXg6MCD17nu0bcAp+ldM8fLIfEd9/AjAjfi6n\nA8clfM1I/Kz+EuCy0D9D6A14FvgE2Amsxl80eHxNzx/QMf5vsBwYHfrnyrLzOQ5YEH+vvoivSet8\nVn8uLwYqEn7H/xnPkTX+/a7p+dTFWSIiEaKJXBGRCFHSFxGJECV9EZEIUdIXEYkQJX0RkQhR0hcR\niRAlfRGRCFHSFxGJkP8HWh63bv3V45sAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10c247128>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "counter = Counter(nums_words)\n",
    "values = list(counter.values())\n",
    "plt.plot(list(counter.keys()), np.cumsum(values)/sum(values))\n",
    "plt.show()\n",
    "# plt.hist(nums_words)\n",
    "# plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEACAYAAACznAEdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHDZJREFUeJzt3X2QVPWd7/H3BxCRBxGDDMqAoIjixtzIKl7Xq/ZGAz5U\nqZWtNSS5q66uWxtNtO5ucgP+I1ZlNVYlu2Zz1arcZBVz3bAkt6KkpBC5pK1KsioaVAIIKIIMyvgA\novi0IN/7xzkj7dA93TPTffr0zOdVNTVnfnMevnMY5tO/3++c04oIzMzMhjS7ADMzywcHgpmZAQ4E\nMzNLORDMzAxwIJiZWcqBYGZmQI2BIGmspF9I2iBpnaSzJI2TtELSRkmPShpbsv4CSZvT9eeUtM+S\n9LykTZLuasQPZGZmfVNrD+GHwLKImAn8F+AFYD6wMiJOBlYBCwAknQpcCcwELgbukaR0P/cC10XE\nDGCGpLl1+0nMzKxfqgaCpCOBcyPiPoCI2B8Re4DLgUXpaouAK9Lly4DF6Xpbgc3AbEkTgTERsTpd\n74GSbczMrMlq6SFMA96UdJ+kP0j6saSRQFtEdAJExE5gQrr+JGB7yfY70rZJQEdJe0faZmZmOVBL\nIAwDZgF3R8Qs4D2S4aLuz7zwMzDMzFrYsBrW6QC2R8TT6df/lyQQOiW1RURnOhz0evr9HcDkku3b\n07ZK7YeQ5HAxM+uDiFD1tcqr2kNIh4W2S5qRNl0ArAOWAtekbVcDD6fLS4F5koZLmgZMB55Kh5X2\nSJqdTjJfVbJNuePm6uPWW29teg2uaWDV5ZpcU70/+quWHgLATcCDkg4DtgB/DQwFlki6FthGcmUR\nEbFe0hJgPbAPuCEOVnojcD8wguSqpeX9/gnMzKwuagqEiHgOOLPMty6ssP4dwB1l2p8BTutNgWZm\nlg3fqVyjQqHQ7BIO4Zpql8e6XFNtXFN2VI9xp3qTFHmsy8wszyQRjZxUNjOzwcGBYGZmgAPBzMxS\nDgQzMwMcCGZmlnIgmJkZ4EAwM7OUA8HMzAAHgpmZpRwIZmYGOBDMzCzlQDAzM8CBYGZmKQeCmZkB\nDgQzM0s5EMzMDHAgmJlZyoFgZmaAA8HMzFIOBDMzAxwIZmaWciCYmRngQDAzs5QDwczMAAeCmZml\nHAhmZgbUGAiStkp6TtIaSU+lbeMkrZC0UdKjksaWrL9A0mZJGyTNKWmfJel5SZsk3VX/H8fMzPqq\n1h7CAaAQEadHxOy0bT6wMiJOBlYBCwAknQpcCcwELgbukaR0m3uB6yJiBjBD0tw6/RxmZtZPtQaC\nyqx7ObAoXV4EXJEuXwYsjoj9EbEV2AzMljQRGBMRq9P1HijZxszMmqzWQAjgMUmrJf1N2tYWEZ0A\nEbETmJC2TwK2l2y7I22bBHSUtHekbWZmlgPDalzvnIh4TdIxwApJG0lColT3r83MrIXUFAgR8Vr6\n+Q1JDwGzgU5JbRHRmQ4HvZ6uvgOYXLJ5e9pWqb2shQsXfrJcKBQoFAq1lGpmNmgUi0WKxWLd9qeI\nnl/YSxoJDImIvZJGASuA24ALgF0Rcaek7wDjImJ+Oqn8IHAWyZDQY8BJERGSngBuAlYDjwD/EhHL\nyxwzqtVlZmafJomIUPU1y6ulh9AG/EpSpOs/GBErJD0NLJF0LbCN5MoiImK9pCXAemAfcEPJX/cb\ngfuBEcCycmFgZmbNUbWH0AzuIZiZ9V5/ewi+U9nMzIAcB4I7CGZm2XIgmJkZkONAOHCg2RWYmQ0u\nuQ0E9xDMzLLlQDAzM8CBYGZmKQeCmZkBOQ4ETyqbmWUrt4HgHoKZWbYcCGZmBjgQzMwsldtA8ByC\nmVm2chsI7iGYmWXLgWBmZoADwczMUg4EMzMDchwInlQ2M8tWbgPBPQQzs2w5EMzMDHAgmJlZyoFg\nZmZAjgPBk8pmZtnKbSC4h2Bmli0HgpmZAQ4EMzNL5TYQPIdgZpat3AaCewhmZtlyIJiZGdCLQJA0\nRNIfJC1Nvx4naYWkjZIelTS2ZN0FkjZL2iBpTkn7LEnPS9ok6a6ejudAMDPLVm96CDcD60u+ng+s\njIiTgVXAAgBJpwJXAjOBi4F7JCnd5l7guoiYAcyQNLfSwRwIZmbZqikQJLUDlwA/KWm+HFiULi8C\nrkiXLwMWR8T+iNgKbAZmS5oIjImI1el6D5RscwhPKpuZZavWHsI/A98GSl+3t0VEJ0BE7AQmpO2T\ngO0l6+1I2yYBHSXtHWlbWe4hmJlla1i1FSRdCnRGxLOSCj2sWtc/4T/60ULGj0+WC4UChUJPhzYz\nG3yKxSLFYrFu+1NUeSku6XbgvwP7gSOAMcCvgDOAQkR0psNBv4mImZLmAxERd6bbLwduBbZ1rZO2\nzwPOj4ivlzlmrF8fzJxZ2w8RkXwMye01U2ZmjSeJiFD1Ncur+ic0Im6JiCkRcQIwD1gVEX8F/Bq4\nJl3tauDhdHkpME/ScEnTgOnAU+mw0h5Js9NJ5qtKtilz3Np/iEWL4Fvfqn19MzM7VNUhox58D1gi\n6VqSV/9XAkTEeklLSK5I2gfcEAe7ITcC9wMjgGURsbzSznszqdzRATt29OEnMDOzT/QqECLiceDx\ndHkXcGGF9e4A7ijT/gxwWm3Hqr2u3buTDzMz67vcjrr3NhDefrtxtZiZDQb9GTJqqFoDoVBIhpfc\nQzAz65+WDoSPPoLHH4dRo+CIIxpfk5nZQJbbQKhlUnl7evvbe+/Bhx8mIaI+X3BlZja4tfQcwtat\nB5c//hj27m1YOWZmA17LBsLSpXD99Qe/PvJITyybmfVHywbCU08lPYSpU2HoUJg82RPLZmb9kdtA\nqDaH8NJLyWTyF74A48YlH+4hmJn1XW4nlav1ELZsgUcegeOPT8JhzBjYtSub2szMBqLc9hCqBcJL\nL8EppyRDRsUifPaz8PTT8OCD8P3vZ1GhmdnA0pI9hD17kstMJ0w42HbJJXDzzfDcc3DYYY2vz8xs\noGnJQHj55aRnUHrPwdlnw6uvwvr1cFpNT0syM7NSuR0y6mlSeetWmDbt023DhsEvfgEXXwyvvNLQ\n0szMBqSW7CGUCwSAc8+Fc85JHmPx4YcwYkTDyjMzG3By20OoZcionCFDYNKk5D0SzMysdi0ZCJV6\nCF2mTPGwkZlZb7VkIPTUQwCYMQM2bKh7SWZmA1puA6GnSeXXXoPjjqv8/dNPhzVr6l+TmdlAlttA\nqNRD6HoznKOPrrytA8HMrPdaLhDefhtGj+755rPPfS4ZMtq3rzG1mZkNRC0XCG+9BePH97ztyJFw\nzDG+0sjMrDdyGwiV5hDefBM+85nq20+efPAd1czMrLrcBkJPPYRaAsGXnpqZ9U5LBkK1ISNwD8HM\nrLdaLhBqHTJyD8HMrHdaLhDcQzAza4zcBkKlSeU33qith3D88ckjLszMrDa5DYRKPYRNm+Ckk6pv\nf+KJybuqVXtvZjMzS1QNBEmHS3pS0hpJ6yTdnraPk7RC0kZJj0oaW7LNAkmbJW2QNKekfZak5yVt\nknRXT8ctFwgRsHZtbW+AM3p0cjezh43MzGpTNRAi4iPgzyPidOBzwBcknQPMB1ZGxMnAKmABgKRT\ngSuBmcDFwD3SJ+9tdi9wXUTMAGZImlv5uIe2vfpqcody6Vtn9mTGDNi8ubZ1zcwGu5qGjCLi/XTx\n8HSb3cDlwKK0fRFwRbp8GbA4IvZHxFZgMzBb0kRgTESsTtd7oGSbMsc8tG3tWvjsZ2upODFjRjLE\nZGZm1dUUCJKGSFoD7ASKEbEeaIuIToCI2Al0vW6fBJQO1OxI2yYBpQ+T6Ejbyio39v/yyzB9ei0V\nJ048EbZsqX19M7PBrKa30IyIA8Dpko4EHpVUALq/hu/hHQx679//fSF//GOyXCgUKBQKdHZCW1vt\n+xg/3u+LYGYDV7FYpFgs1m1/vXpP5Yh4R9Iy4AygU1JbRHSmw0Gvp6vtACaXbNaetlVqL+sv/3Ih\nV1756badO5Mnmdbq6KNh167a1zczayVdL5a73Hbbbf3aXy1XGY3vuoJI0hHAF4E1wFLgmnS1q4GH\n0+WlwDxJwyVNA6YDT6XDSnskzU4nma8q2eYQ5eYQettDOPro5L0TzMysulp6CMcCi9I/4kOAn0XE\n/0vnFJZIuhbYRnJlERGxXtISYD2wD7gh4pM/7zcC9wMjgGURsbzSQSsFwsSJNf5kwLhx7iGYmdWq\naiBExFpgVpn2XcCFFba5A7ijTPszQA13EZSfVN65s/c9BAeCmVltWuZO5Yi+DRnt2lX5rmczMzuo\nZQJh797k8+jRte/jiCNAgg8+qF9dZmYDVcsEwmuvJfMHn9zzXKOjj4bf/rZ+dZmZDVS5DYTucwg7\ndkB7e+/3s3s3zJ0Le/bUpy4zs4Eqt4HQvYfQ0QGTKt7XXFnXcNHrr/e8npnZYNcygdDXHsKvfpW8\ne5oDwcysZy0TCH3tIVxxBZx+ugPBzKyalgmEvvYQIHlctgPBzKxnuQ2E7pPKfe0hgAPBzKwWuQ2E\n7j2Et95Knl7aFw4EM7PqWiYQPvgARo7s274cCGZm1bVMILz/fnLncV9MmJA89sLMzCprmUDoTw9h\nyhTYtq3/NZmZDWS5DYTSSeX9+5OP4cP7tq8pU+DVV2HfvvrUZmY2EOU2EEp7CF29g94+x6jL8OFw\n7LHwyiv1qc3MbCBqmUDo6/xBlxNPhGee8aOwzcwqGTSBMGYMfPnL8Nhj/duPmdlAldtAKJ1D6M+E\ncpdLL00+v/tu//ZjZjZQ1fKeyk1R2kPozyWnXa6/PnlfBAeCmVl5ue0h1HvICJJhIweCmVl5LRMI\n/R0yAgeCmVlPWiYQ3EMwM2us3AZC6aRyPeYQwIFgZtaT3AaCh4zMzLLVMoHgHoKZWWO1RCB4yMjM\nrPFaIhA8ZGRm1ni5DYTudyq7h2Bm1lhVA0FSu6RVktZJWivpprR9nKQVkjZKelTS2JJtFkjaLGmD\npDkl7bMkPS9pk6S7ejqu5xDMzLJVSw9hP/D3EfEnwNnAjZJOAeYDKyPiZGAVsABA0qnAlcBM4GLg\nHumTB1ffC1wXETOAGZLmVjpoI+YQRo92IJiZVVI1ECJiZ0Q8my7vBTYA7cDlwKJ0tUXAFenyZcDi\niNgfEVuBzcBsSROBMRGxOl3vgZJtyhz34PIHH8CoUbX/UJV09RD8CGwzs0P1ag5B0lTg88ATQFtE\ndEISGsCEdLVJwPaSzXakbZOAjpL2jrStrO49hHpMKg8fDuPG+f2VzczKqflpp5JGA78Ebo6IvZK6\nv86u6+vu3/xm4SfL27cXGDmyUJf9nnIKvPACTJxYl92ZmTVNsVikWCzWbX81BYKkYSRh8LOIeDht\n7pTUFhGd6XDQ62n7DmByyebtaVul9rLOO28hCxcmy48/Xp85BDgYCIVCffZnZtYshUKBQskfs9tu\nu61f+6t1yOhfgfUR8cOStqXANeny1cDDJe3zJA2XNA2YDjyVDivtkTQ7nWS+qmSbQzRiyAgOBoKZ\nmX1a1R6CpHOArwFrJa0hGRq6BbgTWCLpWmAbyZVFRMR6SUuA9cA+4IaIT/683wjcD4wAlkXE8krH\nbWQgrFxZn32ZmQ0kVQMhIn4HDK3w7QsrbHMHcEeZ9meA02oprPvTTusVCFOnwiuv1GdfZmYDSW7v\nVG5UD2HSJOjoqL6emdlgM+gCYexY+PhjeOed+uzPzGygGHSBIEF7O+yoeH2TmdnglPtA2Lcv+XzY\nYfXbd3u7h43MzLrLbSB0TSrX6zlGpTyPYGZ2qNwGQlcPoZ7DRV3a22H79urrmZkNJoMyEKZMcQ/B\nzKy7QRsIvhfBzOzTch8I9Xr7zFIOBDOzQ+U2EEonlesdCJMnJ4Hg90UwMzsot4HQyCGjI4+EYcNg\n9+767tfMrJXlPhDee6/+l51CMmy0bVv992tm1qpyHwgdHcl9A/X2p38Kv/99/fdrZtaqchsIXXMI\nL78M06bVf/+XXALLltV/v2ZmrSq3gdDVQ3j5ZTjhhPrv/4tfhGLx04/ZNjMbzHIfCFu2NKaHcNRR\ncPjhnlg2M+uS60CIgK1bGxMIABMnws6djdm3mVmryXUg7NqVvIofM6Yxx5g4ETo7G7NvM7NWk9tA\nOHAguQdh1KjGHaOtzT0EM7MuuQ2ECPjoIxgxonHH8JCRmdlBuQ6EDz9MhowaxYFgZnZQ7gOhkT2E\ntjZYswbefbdxxzAzaxWDOhBOPBFWroQf/KBxxzAzaxW5DYQDB5I5hEYOGZ17Ltx9N7z2WuOOYWbW\nKnIbCFn0EACOO86BYGYGDgSOPdaBYGYGOQ+ERl92CkkP4dVXG3sMM7NWkOtAaPRlp5BcafT66/Dx\nx409jplZ3lUNBEk/ldQp6fmStnGSVkjaKOlRSWNLvrdA0mZJGyTNKWmfJel5SZsk3VXtuAcOZDNk\nNHw4jBsHb7zR2OOYmeVdLT2E+4C53drmAysj4mRgFbAAQNKpwJXATOBi4B5JSre5F7guImYAMyR1\n3+enZDWHAMmwUUdH449jZpZnVQMhIn4LdH9I9OXAonR5EXBFunwZsDgi9kfEVmAzMFvSRGBMRKxO\n13ugZJsKx238ZaddzjwTvvtdmD+/8ccyM8urvs4hTIiIToCI2AlMSNsnAdtL1tuRtk0CSl+Dd6Rt\nFWXZQ5g7Fx5+GO68s/HHMjPLq3pNKked9vOJrOYQAC68MDnO9OmNP5aZWV4N6+N2nZLaIqIzHQ56\nPW3fAUwuWa89bavUXtFDDy1kxAg4/3w466wChUKhj6VWd9RRsG4dXHBBww5hZlZ3xWKRYrFYt/0p\novqLe0lTgV9HxGnp13cCuyLiTknfAcZFxPx0UvlB4CySIaHHgJMiIiQ9AdwErAYeAf4lIpZXOF7c\ncktw++1w773wd3/X75+zqrfegpNOSt6Ux8ysFUkiIlR9zfKq9hAk/RtQAD4j6RXgVuB7wC8kXQts\nI7myiIhYL2kJsB7YB9wQBxPnRuB+YASwrFIYdDn++ORzFkNGAGPHwjvvJHMX6vPpNDNrXVUDISK+\nWuFbF1ZY/w7gjjLtzwCn1VrYhHSaOourjACGDUvCZ+/exr1lp5lZnuX2TuWuQMiqhwDJXMLbb2d3\nPDOzPHEglDjqKNizJ7vjmZnlSW4D4Zhjks9ZBsLYse4hmNngldtAOPLI5DlDWc0hgHsIZja45TYQ\npGTYyD0EM7Ns5DYQAG68EaZNy+54o0cnx3zzzeyOaWaWF7kOhPnzk0dTZ+X665Mnnz75ZHbHNDPL\ni1wHQtbOPDN5fMWLLza7EjOz7DkQupk+HTZvbnYVZmbZcyB0c9JJsHEjvPtusysxM8uWA6Gb6dNh\n5Uq45JJmV2Jmli0HQjcnngg//nHSS9iypdnVmJllp6bHX2dNUjS7rq9/PXniqt9W08xaRX8ff+0e\nQgWXXgqPPtrsKszMsuMeQgV798LEiTBrFjz+uN8jwczyzz2EBhk9Gv7pn+CFF+Dll5tdjZlZ4zkQ\nevC3fwuFAvz+982uxMys8RwIVfzZn8HvftfsKszMGs+BUMVZZ8Hq1c2uwsys8TypXMX778P48bB7\nd7bvzWBm1lueVG6wkSOTu5fXrm12JWZmjeVAqMGZZ8J//EezqzAzaywHQg0uuQR+9COYPRtyMpJl\nZlZ3nkOowXvvJfMIH34IGzbAKac0uyIzs0N5DiEDo0bBqlXw1a/C8uXNrsbMrDGGNbuAVnH22bBz\nJ3ztazB0KHzzm82uyMysvjxk1EtPPw1/8Rfw0kswzHFqZjniIaOMnXEGTJ0Kv/xlsysxM6uvzF/j\nSroIuIskjH4aEXdmXUN/ffvb8A//kEwyH3EEfOlLcNhhza7KzKx/Mu0hSBoC/C9gLvAnwFcktcQ1\nO8Vi8ZPlSy9N5hCWL4e774bzzoMXX2xuTXmRx5ogn3W5ptq4puxkPWQ0G9gcEdsiYh+wGLg84xr6\npPQXQIJvfAMWL4ZiEebNS5559P3vw333JfcqRCSPzv7gg2xqyos81gT5rMs11cY1ZSfrIaNJwPaS\nrztIQqJlDRkCN98Mc+bAV76S3LOwcCEcd1zSa2hrS25su+AC2L8fvvzlZJ16ue22+u2rXvJYE8A/\n/mNyIcCQIZ/+GDo0+ejeVvq5EW3r1iXv212vfff3+0OHwqZNsGxZz+tJh57Drjbp4JtJlX7uT9vb\nb8O2bfXfb3/2sW9f8mKvv8fKG18nUyczZ8Kzz8K77x78T3XTTfDQQ8l/+u9+F/7zP5PPixfDk082\nu+LBZ//+5CNP8viMrJ//vNkVHOqHP2x2BYe6/fbmHbu9HbZvr75eb2V62amk/wosjIiL0q/nA9F9\nYllSPq85NTPLuf5cdpp1IAwFNgIXAK8BTwFfiYgNmRVhZmZlZTpkFBEfS/oGsIKDl506DMzMciCX\ndyqbmVn2cnWnsqSLJL0gaZOk7zSxjq2SnpO0RtJTads4SSskbZT0qKSxGdTxU0mdkp4vaatYh6QF\nkjZL2iBpToY13SqpQ9If0o+LMq6pXdIqSeskrZV0U9retHNVpqZvpu1NO1eSDpf0ZPp7vU7S7Wl7\nM89TpZqa+juVHmdIeuyl6ddN/b9XUtOakprqe54iIhcfJOH0InA8cBjwLHBKk2rZAozr1nYn8D/T\n5e8A38ugjv8GfB54vlodwKnAGpJhwKnpuVRGNd0K/H2ZdWdmVNNE4PPp8miSeapTmnmueqip2edq\nZPp5KPAEcE4OfqfK1dTU85Qe638A/wdYmn7d1PNUoaa6nqc89RDydNOaOLT3dDmwKF1eBFzR6CIi\n4rfA7hrruAxYHBH7I2IrsJkG3ONRoSZIzll3l2dU086IeDZd3gtsANpp4rmqUNOk9NvNPFfvp4uH\nk/yO76b5v1PlaoImnidJ7cAlwE+6Hbtp56lCTVDH85SnQCh309qkCus2WgCPSVot6W/StraI6ITk\nPzswoUm1TahQR/fzt4Nsz983JD0r6SclXenMa5I0laQH8wSV/80yraukpq67T5p2rrqGHICdQDEi\n1tPk81ShJmju79Q/A98m+VvQpdm/T+VqgjqepzwFQp6cExGzSNL4Rknncug/Ql5m4/NQxz3ACRHx\neZL/1D9oRhGSRgO/BG5OX5U3/d+sTE1NPVcRcSAiTifpQZ0rqUCTz1O3ms6TdD5NPE+SLgU60x5e\nT9f0Z3aeeqiprucpT4GwA5hS8nV72pa5iHgt/fwG8BBJV6tTUhuApInA682orYc6dgCTS9bL7PxF\nxBuRDlwC/5uDXdPMapI0jOQP788i4uG0uannqlxNeThXaR3vAMuAM8jJ71Ra0yPAGU0+T+cAl0na\nAvwc+IKknwE7m3ieytX0QL3PU54CYTUwXdLxkoYD84ClWRchaWT6qg5Jo4A5wNq0lmvS1a4GHi67\ngwaUxKdfEVSqYykwT9JwSdOA6SQ3/jW8pvQ/R5cvAX9sQk3/CqyPiNKHHDT7XB1SUzPPlaTxXUMK\nko4Avkgy8di081ShpmebeZ4i4paImBIRJ5D8HVoVEX8F/JomnacKNV1V9/PUiJnwvn4AF5FcjbEZ\nmN+kGqaRXOG0hiQI5qftRwMr0/pWAEdlUMu/Aa8CHwGvAH8NjKtUB7CA5GqCDcCcDGt6AHg+PW8P\nkYy1ZlnTOcDHJf9uf0h/lyr+mzW6rh5qatq5Ak5L61gDPAd8q9rvdhNraurvVMmxzufgFT1NO089\n1FTX8+Qb08zMDMjXkJGZmTWRA8HMzAAHgpmZpRwIZmYGOBDMzCzlQDAzM8CBYGZmKQeCmZkB8P8B\nKgXY9oUcA04AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10e956438>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "counter = Counter(nums_words_per_sent)\n",
    "plt.plot(list(counter.keys()), list(counter.values()))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEACAYAAACznAEdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEMNJREFUeJzt3W+sXHWdx/H3Rwv4l253k95mKRYMioWsf8im4BLjVVwE\nTQqPCGhckA1P0Eh0/dO6D6iPEBPjwq5sYtRuJSgpyoaaEKlNd0w0cVGRhdBaatyW0t1e4kow8KC0\n8N0H8wOHyy1/ZubOzB3er2SSc373nDm/L0Pm09/vnDMnVYUkSa8adwckSZPBQJAkAQaCJKkxECRJ\ngIEgSWoMBEkS8BICIcm3kswlua+nbUWS7Un2JLkryfKev21MsjfJ7iTn97SfleS+JA8m+afhlyJJ\nGsRLGSFsBj44r20DsKOqTgd2AhsBkpwBXAKsBS4EbkqSts+/An9fVW8F3ppk/ntKksboRQOhqn4K\nPDqv+SJgS1veAlzcltcDt1bV0araB+wF1iVZBbyxqn7RtvtOzz6SpAnQ7zmElVU1B1BVh4CVrf0k\n4EDPdgdb20nAwz3tD7c2SdKEGNZJZX//QpKWuGV97jeXZKaq5tp00COt/SBwcs92q1vbsdoXlMSA\nkaQ+VFVefKuFvdQRQtrrGduAK9ry5cAdPe2XJjk+yanAacDdbVrpsSTr2knmv+vZZ0FVNbWva6+9\ndux9sDbrs77pew3qRUcISb4LzAJ/keQh4Frgy8BtSa4E9tO9soiq2pVkK7ALOAJcXX/q5SeAfwNe\nA9xZVT8auPeSpKF50UCoqo8c408fOMb21wHXLdD+K+CvXlbvJEkj453KYzA7OzvuLiyaaa4NrG+p\nm/b6BpVhzDsNW5KaxH5J0iRLQo3gpLIkacoZCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIA\nA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMhJdl1apTSDLy16pVp4y7dEmvAD4P4WXoPg56HP3K\nUJ6XKmm6+TwESdJQGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUG\ngiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiRgwEBIsjHJA0nuS3JLkuOTrEiyPcmeJHclWT5v\n+71Jdic5f/DuS5KGpe9ASLIGuAp4V1W9HVgGXAZsAHZU1enATmBj2/4M4BJgLXAhcFO6DymWJE2A\nQUYIfwSeBF6fZBnwWuAgcBGwpW2zBbi4La8Hbq2qo1W1D9gLrBvg+JKkIeo7EKrqUeCrwEN0g+Cx\nqtoBzFTVXNvmELCy7XIScKDnLQ62NknSBFjW745J3gx8GlgDPAbcluSjQM3bdP76S7Jp06Znl2dn\nZ5mdne2rn5I0rTqdDp1OZ2jvl6q+vq9Jcgnwt1V1VVv/GHAO8H5gtqrmkqwC/qOq1ibZAFRVXd+2\n/xFwbVX95wLvXf32azF1T3mMo19hEv97SJosSaiqvs/NDnIOYQ9wTpLXtJPD5wG7gG3AFW2by4E7\n2vI24NJ2JdKpwGnA3QMcX5I0RH1PGVXVfyX5DvAr4Cng18A3gDcCW5NcCeyne2URVbUryVa6oXEE\nuHoihwGS9ArV95TRYnLK6HlHdspI0osa55SRJGmKGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJj\nIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCBngegkbphPbT26M1M7OGQ4f2jfy4ksbD5yG8\nDON8HoLPYZD0YnwegiRpKAwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAg\nSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkpqBAiHJ8iS3Jdmd5IEkZydZkWR7\nkj1J7kqyvGf7jUn2tu3PH7z7kqRhGXSEcANwZ1WtBd4B/AbYAOyoqtOBncBGgCRnAJcAa4ELgZvS\nfWq9JGkC9B0ISU4E3lNVmwGq6mhVPQZcBGxpm20BLm7L64Fb23b7gL3Aun6PL0karkFGCKcCv0+y\nOck9Sb6R5HXATFXNAVTVIWBl2/4k4EDP/gdbmyRpAgwSCMuAs4CvV9VZwBN0p4tq3nbz1yVJE2jZ\nAPs+DByoql+29R/QDYS5JDNVNZdkFfBI+/tB4OSe/Ve3tgVt2rTp2eXZ2VlmZ2cH6KokTZ9Op0On\n0xna+6Wq/3/AJ/kJcFVVPZjkWuB17U9/qKrrk3wBWFFVG9pJ5VuAs+lOFf0YeEst0IEkCzWPXfcc\n+Dj6Nb7jTuLnIGlhSaiqvi/WGWSEAPAp4JYkxwG/Az4OvBrYmuRKYD/dK4uoql1JtgK7gCPA1RP5\nrS9Jr1ADjRAWiyOE5x15bMedxM9B0sIGHSF4p7IkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIk\nwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElS\nYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVKzbNwdeLmeeOIJbr/99nF3Q5KmzpIL\nhM2bN/PZz/4zxx23bqTHPXLk7pEeT5JGbckFwtNPPw18kMcfv3Gkxz3hhE8CD470mJI0Sp5DkCQB\nBoIkqVlyU0YapRNIMtIjzsys4dChfSM9pqQuA0Ev4DBQIz3i3NxoA0jSnzhlJEkChhAISV6V5J4k\n29r6iiTbk+xJcleS5T3bbkyyN8nuJOcPemxJ0vAMY4RwDbCrZ30DsKOqTgd2AhsBkpwBXAKsBS4E\nbsqoJ6glScc0UCAkWQ18CPhmT/NFwJa2vAW4uC2vB26tqqNVtQ/YC4z27jJJ0jENOkL4GvA5nnvm\ncaaq5gCq6hCwsrWfBBzo2e5ga5MkTYC+AyHJh4G5qroXeKGpn9FepiJJ6ssgl52eC6xP8iHgtcAb\nk9wMHEoyU1VzSVYBj7TtDwIn9+y/urUtaNOmTc8uz87OMjs7O0BXJWn6dDodOp3O0N4vVYP/Az7J\ne4F/qKr1Sb4C/F9VXZ/kC8CKqtrQTirfApxNd6rox8BbaoEOJFmoGYAbb7yRz3/+txw+PPrfMjp8\n+OuMZ8CTV9BxwzD+n5ReiZJQVX1frLMYN6Z9Gdia5EpgP90ri6iqXUm20r0i6Qhw9TG/9SVJIzeU\nQKiqnwA/act/AD5wjO2uA64bxjElScPlncqSJMBAkCQ1BoIkCTAQJEmNgSBJAnwegibO6B/KAz6Y\nRwIDQRNn9A/lAR/MI4FTRpKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAk\nAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiS\nGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElq+g6EJKuT7EzyQJL7k3yqta9Isj3JniR3JVnes8/G\nJHuT7E5y/jAKkCQNxyAjhKPAZ6rqTODdwCeSvA3YAOyoqtOBncBGgCRnAJcAa4ELgZuSZJDOS5KG\np+9AqKpDVXVvW34c2A2sBi4CtrTNtgAXt+X1wK1VdbSq9gF7gXX9Hl+SNFxDOYeQ5BTgncDPgZmq\nmoNuaAAr22YnAQd6djvY2iRJE2DZoG+Q5A3A94FrqurxJDVvk/nrL8mmTZueXZ6dnWV2drbfLkrS\nVOp0OnQ6naG930CBkGQZ3TC4uaruaM1zSWaqai7JKuCR1n4QOLln99WtbUG9gSBJer75/1j+0pe+\nNND7DTpl9G1gV1Xd0NO2DbiiLV8O3NHTfmmS45OcCpwG3D3g8SVJQ9L3CCHJucBHgfuT/Jru1NAX\ngeuBrUmuBPbTvbKIqtqVZCuwCzgCXF1VfU0nSZKGr+9AqKqfAa8+xp8/cIx9rgOu6/eYkqTF453K\nkiTAQJAkNQNfdipNhxMYx43zMzNrOHRo38iPKy3EQJAAOEyft8wMZG7OX2/R5HDKSJIEGAiSpMZA\nkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAg\nSJIaA0GSBBgIkqTGQJAkAQaC9Iq0atUpJBn5a9WqU8Zdul7AsnF3QNLozc3tB2oMx83Ij6mXzhGC\nJAkwECRJjVNG0lidQOI0iiaDgSCN1WHGMZcPhpCezykjSRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSp\nGXkgJLkgyW+SPJjkC6M+vqRxOsHfUJpgIw2EJK8C/gX4IHAmcFmSt42yD5OhM+4OLKLOuDuwyDrj\n7sAi6yzy+z9z38VoX93fboJOZ7HrW9pGPUJYB+ytqv1VdQS4FbhoxH2YAJ1xd2ARdcbdgUXWGXcH\nFlln3B1YJN2Ryfve9z5HJi9g1IFwEnCgZ/3h1iZJi+iZkcm1jGNkslQsuZ+uOO6440h+yIkn/vdI\nj/vkk/eP9HiSNGqpGt3vqCQ5B9hUVRe09Q1AVdX187Ybx4+7SNKSV1V9/1DVqAPh1cAe4Dzgf4G7\ngcuqavfIOiFJWtBIp4yq6qkknwS20z1/8S3DQJImw0hHCJKkyTVRdypP201rSVYn2ZnkgST3J/lU\na1+RZHuSPUnuSrJ83H3tV5JXJbknyba2Pk21LU9yW5Ld7TM8e8rq29jqui/JLUmOX8r1JflWkrkk\n9/W0HbOeVv/e9vmeP55ev3THqO8rrf/3JvlBkhN7/vay65uYQJjSm9aOAp+pqjOBdwOfaDVtAHZU\n1enATmDjGPs4qGuAXT3r01TbDcCdVbUWeAfwG6akviRrgKuAd1XV2+lOH1/G0q5vM93vj14L1pPk\nDOASYC1wIXBTMvGPrluovu3AmVX1TmAvA9Y3MYHAFN60VlWHquretvw4sBtYTbeuLW2zLcDF4+nh\nYJKsBj4EfLOneVpqOxF4T1VtBqiqo1X1GFNSH/BH4Eng9UmWAa8FDrKE66uqnwKPzms+Vj3rgVvb\n57qP7pfpulH0s18L1VdVO6rq6bb6c7rfL9BnfZMUCFN901qSU4B30v3QZqpqDrqhAawcX88G8jXg\nczz3GZDTUtupwO+TbG5TYt9I8jqmpL6qehT4KvAQ3SB4rKp2MCX19Vh5jHrmf98cZOl/31wJ3NmW\n+6pvkgJhaiV5A/B94Jo2Uph/Jn/JndlP8mFgro2AXmgouuRqa5YBZwFfr6qzgCfoTj8s+c8OIMmb\ngU8Da4C/pDtS+ChTUt8LmLZ6AEjyj8CRqvreIO8zSYFwEHhTz/rq1rakteH494Gbq+qO1jyXZKb9\nfRXwyLj6N4BzgfVJfgd8D3h/kpuBQ1NQG3RHqAeq6pdt/Qd0A2IaPjuAvwZ+VlV/qKqngH8H/obp\nqe8Zx6rnIHByz3ZL9vsmyRV0p24/0tPcV32TFAi/AE5LsibJ8cClwLYx92kYvg3sqqobetq2AVe0\n5cuBO+bvNOmq6otV9aaqejPdz2pnVX0M+CFLvDaANs1wIMlbW9N5wANMwWfX7AHOSfKadrLxPLoX\nByz1+sJzR6zHqmcbcGm7supU4DS6N8pOuufUl+QCutO266vqcM92/dVXVRPzAi6g+z/qXmDDuPsz\nhHrOBZ4C7gV+DdzTavxzYEerdTvwZ+Pu64B1vhfY1panpja6Vxb9on1+twPLp6y+z9ENufvonnA9\nbinXB3wX+B+6v2T3EPBxYMWx6qF7Rc5v6V7scf64+99nfXuB/e275R7gpkHq88Y0SRIwWVNGkqQx\nMhAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAfD/JPaxADW4SIwAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10e94a6d8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(nums_sents)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEACAYAAABfxaZOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH9VJREFUeJzt3XuQXGWd//H3d+6TSRiSQIibkAAikLAgCKIrqzSCgLoS\n1gsKKz8ChbWKi9buekn4aTHZxQXdcvmxWtbWCmK8LQvrhbBSlZgKzcpyR2IiCUMUSEJMBkhCIBeS\nmcn398dzOt3T0z3T1+nT3Z9X1VSfPn366W9Oks955jlPn2PujoiINIeWWhcgIiITR6EvItJEFPoi\nIk1EoS8i0kQU+iIiTUShLyLSRMYNfTO73cwGzGxN1vrrzGy9ma01s5sz1i82sw3RaxdUo2gRESlN\nWwHb3AF8C/hBaoWZJYAPAae4+5CZHRGtnwdcCswDZgMrzewtri8DiIjEwrg9fXd/ENiZtfozwM3u\nPhRt80q0fgFwp7sPufsLwAbgrMqVKyIi5Sh1TP8E4D1m9oiZ3W9mZ0TrZwGbM7bbEq0TEZEYKGR4\nJ9/7prr7O83s7cDdwHGVK0tERKqh1NDfDPwMwN0fN7NhM5tO6NnPydhudrRuFDPTOL+ISAnc3Up9\nb6HDOxb9pPwCeC+AmZ0AdLj7dmAZ8HEz6zCzY4HjgcfyNerusf+54YYbal6D6lSd9VxnPdRYT3WW\na9yevpn9BEgA081sE3AD8D3gDjNbC+wH/k8U4uvM7C5gHTAIXOuVqFJERCpi3NB398vzvHRFnu1v\nAm4qpygREakOfSN3HIlEotYlFER1VpbqrJx6qBHqp85yWa1GX8xMIz8iIkUyM3wCTuSKiEgDUOiL\niDQRhb6ISBNR6IuINBGFvohIE1Hoi4g0EYW+iEgTUeiLiDSR2If+tm3w3/9d6ypERBpD7EP/scfg\n1ltrXYWISGOIfegPDcHu3bWuQkSkMcQ+9IeHFfoiIpUS+9BXT19EpHLqIvT37Kl1FSIijaEuQn/3\nbrj+erj33lpXIyJS3+oi9Pftg7Vr4fnn4d/+LYzzi4hI8eoi9AFefBFefx2uuw4GBmpbk4hIvRo3\n9M3sdjMbMLM1OV77ezM7aGbTMtYtNrMNZrbezC4ot8BUr37zZnj55XAQeOmlclsVEWlOhfT07wAu\nzF5pZrOB9wEbM9bNAy4F5gHvB75jZiXf1gvSPf3t22HLlrCs0BcRKc24oe/uDwI7c7x0C/DFrHUL\ngDvdfcjdXwA2AGeVU2Aq9EGhLyJSrpLG9M3sYmCzu6/NemkWsDnj+ZZoXckU+iIildNW7BvMrBu4\nnjC0U3WZob91a3hU6IuIlKbo0AfeDBwD/DYar58N/MbMziL07OdkbDs7WpdTX1/foeVEIkEikRi1\nTWboDw6GR4W+iDSLZDJJMpmsWHvm7uNvZHYMcK+7n5LjteeBt7n7TjObD/wYeAdhWOdXwFs8x4eY\nWa7Vo3z1q3DjjdDTE76Ze9hh8O5363LLItKczAx3L3mCTCFTNn8CPAScYGabzOyqrE0cMAB3Xwfc\nBawD7gOuLSjZx5Dq6c+Jfn94y1vU0xcRKdW4wzvufvk4rx+X9fwm4KYy6zpkaAhaWkLor18Pb34z\nPPpopVoXEWkudfGN3N5eOPro8HzGDNi/v7Y1iYjUq7oI/Q9/GBYuhPZ2mDZt5MldEREpXF2E/hln\nwNlnw5QpCn0RkXLEPvSHh6EtOvOg0BcRKU/sQ39oCFpbw7JCX0SkPHUR+qme/kc+AiefrNAXESlV\nKd/InVCZod/XBwcPhnXuUN71O0VEmk9d9fQhzNlvaQnhLyIixam70IfwXEM8IiLFi33oZ87eSWlr\ng+9+N1yXR0RECldXY/opbW2wbVv6UssiIlKY2Pf0M6dsprS1wb59sHt3bWoSEalXdRH6uXr6Cn0R\nkeIp9EVEmkhdh/6ePbWpSUSkXsU+9PPN3lFPX0SkeLEPfQ3viIhUjkJfRKSJ1EXo55uyuWcPPP+8\nvp0rIlKoQm6MfruZDZjZmox13zCz9Wa22sx+amaHZby22Mw2RK9fUG6BY/X0h4bgL/8SHn643E8R\nEWkOhfT07wAuzFq3AjjZ3U8DNgCLAcxsPnApMA94P/Ads/KuhTlW6AP098PeveV8gohI8xg39N39\nQWBn1rqV7p66zuUjwOxo+WLgTncfcvcXCAeEs8opcKzZOwBvvKEbpYuIFKoSY/pXA/dFy7OAzRmv\nbYnWlSxXT7+9PR36EIJfRETGV9YF18zs/wKD7v4fpby/r6/v0HIikSCRSIzaJt/wTuaQjkJfRBpV\nMpkkmUxWrL2SQ9/MFgIfAN6bsXoLcHTG89nRupwyQz+ffLN3BgfTzzW8IyKNKrtDvGTJkrLaK3R4\nx6Kf8MTsIuCLwMXunhm5y4BPmFmHmR0LHA88Vk6B+Xr6AJMnh0f19EVECjNuT9/MfgIkgOlmtgm4\nAbge6AB+FU3OecTdr3X3dWZ2F7AOGASudXcvp8CxQn/q1PAFLYW+iEhhxg19d788x+o7xtj+JuCm\ncorKNFboz5oV7pWr4R0RkcLE/s5Z+aZsAnzrW3Dfferpi4gUqm4vwwDQ2Qnd3Qp9EZFCxTr0Dx4E\nM2jJqjIV+u3t0NWl4R0RkULFOvRz9fJhdOirpy8iUpjYh372eD6MDP3OToW+iEih6j70NbwjIlK4\nWId+rpk7oOEdEZFSxTr0C+3pK/RFRApT96Hf2anhHRGRQtV16Le1qacvIlKM2Ie+pmyKiFROrEN/\n377wjdtsbW3hYGCm4R0RkWLEOvT37IGentHr29pCLx/U0xcRKYZCX0SkiTRE6Gt4R0SkMHUf+roM\ng4hI4Roi9Pfvh/LuzyUi0hzqPvRbWsJy5o3SRUQkt7oPfdDJXBGRQo0b+mZ2u5kNmNmajHVTzWyF\nmfWb2XIz6814bbGZbTCz9WZ2QTnFFRr6nZ1hTr+IiIytkJ7+HcCFWesWASvd/URgFbAYwMzmA5cC\n84D3A98xMyu1uHyh394+MvR1y0QRkcKMG/ru/iCwM2v1AmBptLwUuCRavhi4092H3P0FYANwVqnF\naXhHRKSySh3Tn+HuAwDuvg2YEa2fBWzO2G5LtK4khYZ+dzfs3Qvbt5f6SSIizSHHNSxLUtKEyb6+\nvkPLiUSCRCIx4vVievorV8Ly5eFRRKRRJJNJkslkxdorNfQHzOwodx8ws5nAS9H6LcDRGdvNjtbl\nlBn6ueQL/UmTRq7v7oatW+H11wusXkSkTmR3iJcsWVJWe4UO71j0k7IMWBgtXwnck7H+E2bWYWbH\nAscDj5Va3J49IeCznXMO/OhH6efd3bBjhy7HICIynnF7+mb2EyABTDezTcANwM3A3WZ2NbCRMGMH\nd19nZncB64BB4Fr30r8rm6+n39ICU6emn3d1hfF8hb6IyNjGDX13vzzPS+fn2f4m4KZyikrJF/rZ\nurth2zbN4BERGU9dfiM3m3r6IiKFqdTsnaoopqe/fbsuuiYiMp7Y9vSffDL04KdMGX/brq5wIlfD\nOyIiY4tt6C9eDF/7Wvom6GPp7g69fA3viIiMLbah/+yzcO65hW3b1RUeh4fDj4iI5Bbb0N++HaZP\nL2zb7u70snr7IiL5xTL0DxwI4/OHHVbY9qmePmhcX0RkLLEM/e3bYdo0KPSizOrpi4gUJrahX+jQ\nDozs6Sv0RUTyi23oH3FE4dtn9vQ1vCMikl9sQ7+Unv6kSerpi4iMJZah/8orxYV+qqc/dapCX0Rk\nLLEM/VJ7+lOnanhHRGQsDRH6qZ7+tGnq6YuIjKUhQr+rK1yuYcoUhb6IyFhiGfr79uW+Y1Y+3d1h\n+87OMLyzaRMMDVWvPhGRehXL0B8cHHnj8/EceSR85jMh9Pfvh6uvhv/5n+rVJyJSrxoi9Lu64Oab\n06G/f3/4bUFEREZqiNBP6eoKgT84GK7fIyIiI5UV+ma22MyeNrM1ZvZjM+sws6lmtsLM+s1suZn1\nFttuqaGfGtMfGtIJXRGRXEoOfTObC3wKON3dTyXcevEyYBGw0t1PBFYBi4ttu5zQ378/hL56+iIi\no5XT038NOAD0mFkb0A1sARYAS6NtlgKXFNuwhndERKqj5NB3953AN4FNhLDf5e4rgaPcfSDaZhsw\no9i2NbwjIlIdBdyBNjczOw74W2AusAu428z+CvCsTbOfH9LX13doOZFIkEgkgPJC/5VX1NMXkcaR\nTCZJJpMVa6/k0AfOBP7X3XcAmNnPgXcBA2Z2lLsPmNlM4KV8DWSGfqZKjOmrpy8ijSCzQwywZMmS\nstorZ0y/H3inmXWZmQHnAeuAZcDCaJsrgXuKbbjU0O/oCD18ncgVEcmt5J6+u//WzH4APAkMA08B\n/w5MAe4ys6uBjcClxbZdaui3t4f3anhHRCS3coZ3cPd/Bv45a/UO4Pxy2i2npz84qOEdEZF8Guob\nue3tGt4RERlLw4V+anhHPX0RkdEaMvTV0xcRya2hQr+jI/Twh4cV+iIiuTRU6Le3py+prOEdEZHR\nYhf6Bw+Gn9bW4t+bGfrq6YuIjBa70E/18s2Kf29HB+zdG5YV+iIio8U29EvR3p4OfQ3viIiM1rCh\nr56+iMhoDRX6HR06kSsiMpaGCn319EVExtZwoZ+i0BcRGa2hQr+jIzy2t2t4R0Qkl4YK/dT7Jk1S\nT19EJJdYhn5biRd8ToV+T496+iIiucQy9Msd3unpUU9fRCSXhgr9zJ6+Ql9EZLSGCn2zcM2eSZPC\n8I57ZWsTEal3DRX6EIZ4OjuhpQUGBuCHP6xcbSIi9a6s0DezXjO728zWm9nTZvYOM5tqZivMrN/M\nlptZbzFtDg2VF/rt7eFEcEcHLFsGX/5y6W2JiDSacnv6twL3ufs84K3AM8AiYKW7nwisAhYX02C5\nPf329vDT2QkPPQRbt4YfEREpI/TN7DDg3e5+B4C7D7n7LmABsDTabClwSTHtViL029qguxuSSZgy\nBZ58svT2REQaSTk9/WOBV8zsDjP7jZn9u5lNAo5y9wEAd98GzCim0UqM6be1wTXXwMaNcPnl8MQT\npbcnItJISvwa1KH3vg34rLs/YWa3EIZ2sufM5J1D09fXd2g5kUiQSCQqNryzaBHMnAmnnw4f/zh8\n4QsweXLp7YqI1EIymSSZTFasPfMS5zWa2VHAw+5+XPT8zwmh/2Yg4e4DZjYTuD8a889+v+f67O9/\nH+6/H5YuHfVSQebNgzPOgB/9KL3uox+F886Dz3ymtDZFROLCzHD3Eu4tGJQ8vBMN4Ww2sxOiVecB\nTwPLgIXRuiuBe4pptxLDO9nvnz8fXn659DZFRBpFOcM7AJ8Dfmxm7cBzwFVAK3CXmV0NbAQuLabB\nSp3IzdTTA9u3l96miEijKCv03f23wNtzvHR+qW1WI/QnT4ZNm0pvU0SkUTTkN3Kz3z95MuzeXV5d\nIiKNoOFCP19PX6EvItKgoa+evohIbrEL/QMHwiUUSpX6clamnh6FvogIxDD09+9P3wylFPmGd/bs\nKa8uEZFGELvQP3Cg/NDX8I6ISG6xDP1KD+8o9EVEgliGfjWGdxT6IiINGvrZwzuTJsG+fXDwYHm1\niYjUu3Ivw1Bx5Yb+FVfA9Okj17W0QFdXCP6envLqExGpZw0X+u96V+71qSEehb6INLOGG97JR+P6\nIiIxDP1y5+nno9AXEYlh6KunLyJSPbEM/XLm6eczeTLs2AELFlS+bRGRehHL0K9GT/+ww+C552DZ\nMhgaqnz7IiL1oGlCv7cXNm4My6+9Vvn2RUTqQdOE/uGHp++etWtX5dsXEakHTRP6mT39V1+tfPsi\nIvWg7NA3sxYz+42ZLYueTzWzFWbWb2bLzay3mPaqNWUzM/TV0xeRZlWJnv7ngXUZzxcBK939RGAV\nsLiYxqo5vPPyy2FZoS8izaqs0Dez2cAHgNsyVi8AlkbLS4FLimmzWlM2ezN+39Dwjog0q3J7+rcA\nXwQ8Y91R7j4A4O7bgBnFNFjNMf0U9fRFpFmVfME1M/sgMODuq80sMcamnu+Fvr6+Q8uJRIJzzklw\n4EB5N0bP5/DDw2Nvr0JfROpHMpkkmUxWrD1zz5vJY7/R7J+ATwJDQDcwBfg5cCaQcPcBM5sJ3O/u\n83K837M/e3AwXPt+cLCkksb0/PNw3HFw6qlw/vnwzW9W/jNERKrNzHB3K/X9JQ/vuPv17j7H3Y8D\nPgGscvcrgHuBhdFmVwL3FNpmtYZ2ID28M3euevoi0ryqMU//ZuB9ZtYPnBc9L0i1pmtCuAwDKPRF\npLlV5CYq7v4A8EC0vAM4v5R2qtnTb2sLF12bOxeeeaY6nyEiEnex+kZutaZrpsyZAyeckJ6yuW8f\ndHfDSy9V7zNFROIkdqFfrZ4+wJo1MH9+uMQyhJO5b7wBW7ZU7zNFROKkqUK/tTXcNH379vD8gQfC\no666KSLNoqlCH8Isnt27w7TQV1+FI49U6ItI82i60G9pgWnTwhDPrl1hnP+112D1arjttvHfLyJS\nz5ou9CE9xLNrFxx9dAj9X/8a7ruv+p8tIlJLFZmyWSnVnKefKRX6r76aDv1du2DPnup/tohILTVt\nTz81Y2fGjBD6f/wj7N1b/c8WEamlpg39P/whnNTt7VXoi0jziN3wTjW/nJUyfTo891wI/ClTQuhv\n3QpDQ9X/bBGRWopVT3+iQv+II0JP//DDwzV5Uj19jemLSKNr2tDfsCH09A87LNxGcccODe+ISONr\nytA/6aTQs0+Ffn9/GPJR6ItIo2vK0D/ttPAlrdTwziuvwNveFq7Dc/Bg9T9fRKRWmjL0e3pCbz/V\n0wf40IegqysEfz5nnqmTvSJS35oy9AHOOGNk6P/FX4RbNeY7mXvgADz5ZLgcs4hIvYrdlM0pUybm\ns77ylXCAmTwZli+HY48NoZ9vXH/37omvUUSk0mIX+kccMTGfdcIJ6eULLgiPPT2Fhb6ISL0qeXjH\nzGab2Soze9rM1prZ56L1U81shZn1m9lyM+sttM2JHN7JpdCevohIvSpnTH8I+Dt3Pxn4M+CzZnYS\nsAhY6e4nAquAxYU2OFEXXMsnc0x/eBgeeST9mkJfRBpByaHv7tvcfXW0vBtYD8wGFgBLo82WApcU\n2mate/qZwzuPPw5XXpl+LXUwUOiLSD2ryOwdMzsGOA14BDjK3QcgHBiAGYW2U+vQzxze2bRp5FCP\nevoi0gjKDn0zmwz8F/D5qMfvWZtkP88rTqG/cePI6ZkKfRFpBGXN3jGzNkLg/9Dd74lWD5jZUe4+\nYGYzgZfyvb+vr+/QciKRYP/+RM1DPzWMo9AXkThIJpMkk8mKtVfulM3vAevc/daMdcuAhcDXgSuB\ne3K8DxgZ+gBLlsRnTH/TphD67mCm0BeR2kgkEiQSiUPPlyxZUlZ7JYe+mZ0N/BWw1syeIgzjXE8I\n+7vM7GpgI3BpoW3Weninpycd7hs3hsA/cCDUpBO5ItIISg59d/9foDXPy+eX0matQ/9Nb4Lf/S4s\nb9oULsq2b1+oST19EWkETXvtnVzmzAk9/L17Q9gfeWR6XF+hLyKNIHaXYahl6M+dG3r4L70Ubpje\n2hpC/9pr4e67ob09DPeIiNQr9fQzzJ0bevoDAyH0u7tD6G/YEK65P326evoiUt8U+hl6e8M4fn//\nyNDfvj28Pm2aQl9E6ptCP8vcueESDDNmhHn7+/aFXj6opy8i9U+hn+WYY+DRR0f39GfPhj/5E4W+\niNS32IS+ezxC/5RTwh2yUqG/Y0e4ReLvfgenn67QF5H6FpvQHxoK33xtq/F8ojPOCDdHT4X+iy+G\nYZ3e3nBASoW+F3xFodH27Qt/3nLaEBEpRWymbMahlw8h9GF06EM69G+5BbZuhW98o/j2f/nLcD/e\ntrYQ/O3t4R4C7e3pn8znxS6X+/5i2mprCwdqEakfCv0sc+aEWzaOFfpPPQUPPghf/3rxoffOd4ab\nsW/cGB6HhmBwMMz/Hxwsfzn7+e7dlWk312cMDYXgn4iDUbUOZjpoSbNR6GcxgxUr4K1vTYf+rFnh\ntVToP/dcCO116+Dkk4trf/r0cE/e//xP+Ou/DuHT0RGu+1Nv3EPwV+pgkuvAsndvZdvN/ozW1toe\njMp9f0tsBmilXij0czj99PDY3Q2bN4cDAKRDv78fPvYxuPfe4kMfYOFCuPHGEPr1zCwdPvUoddCq\n9MEkc/mNN+C116r3Ga2ttRnaq9T7W/NdvUuqJhahPzwMH/4wTJlS60pG6u6GP/4xDPdACP0XXwxh\nd9VV8A//AIsWFd/uhRfCNdfAM8/ASSdVtmYpXCMctIaHxx7eK3d5//4wRFitz8j8O4jDeapi39/a\nWn9DhLEI/c2bw/Vu1qypdSUjdXeHx1RvvrMT1q6FU0+FRAI+/vH0dXqK0dYGn/wkLF0KN91U2Hu2\nbAlfGluwoP7+kUl1pGa71XrGW6lSB61Czk2Vs7xnT2U+48CBMPMuNfsOCjtolXsA6u4OowK9vZXZ\n77H459LfD/PmpU+YxkUq9M88Mzx2doa/8Pnzw/Jll8HXvga33pq/jXyuvDL0+G+8cexfcQcH4V//\nNRwcpk2Dn/4Uvvtd6Ooq/jNFSnXwYAi68X6Ghwvbrtht49D2wYPpg2xXV3q5tTW9PN5PqsPmnt6n\n2c9TB5hU22+8kd6uEmIR+s8+CyecUOsqRkvdRev448Nj6pxD6iDwj/8YDgBXX50e9y/Un/5p+Ibv\nypUh/HP59a/DFT7f9CZ46KHwreCrroJzz4Wf/xxmziz+zyTFSfVG4xpEE1VH6reKYgKumG0L2b6z\nM0x4qEbbhWzb0tIYv2XHIvT7++MZ+jt3hsfUDIlU6Kfm8h9xROipX3ZZCOO3vz28Vui5iYUL4fvf\nHx36AwPwpS/BqlXwL/8CH/1o+h/bnXeGcwnveAfccw+cdlo5f8Kx5erdNUvIZS4XEhjVDqFJkyY2\n4LK30SyhxhGL0H/2WfjgB2tdxWiLF4defLZTT00vX3NNmG//0EPws5+F8xLHHBMOAGedFS7glit0\nhofh9ddDiM+fD5Mnh5Nm3/52GL8HuO46eOIJeOSR0W28/HKYZdTRAR/6UHUC0T2MKU50wGU+7+gY\nO/Cq3fOsxxN1ImMxr9G1AMzMU5/9qU/BV74SAjLOdu6ET386zLHP58CBcJ2exx+Hxx4Ls39yBWcq\nVG67Lbx+5pnw8MOhjUsuCT348UJr9epwPuH00+GrXx35OZUIZvXuROLHzHD3krsiVQt9M7sI+H+E\n6/vc7u5fz3rda3XAiZPly+EjHwlDQt/4RpjVU0zPcsuWcJA48cRwANEJXpHGVm7oV6UvZ2YtwLeB\nC4GTgcvMrC5npCeTyaq2f+GF8Pvfwx/+AFdcUfxQwqxZ8MADsGVLknPPhW3bqlNnpVR7f1aK6qyc\neqgR6qfOclXrF/izgA3uvtHdB4E7gQVV+qyqmoh/CDNnhnHrUk2aBO95T5KLLgoneFevrlxtlVYv\n/7FUZ+XUQ41QP3WWq1oncmcBmzOev0g4EEiVmMENN4STwp/+dDixrDF5EcmmWGgwH/tYuAKoAl9E\ncqnKiVwzeyfQ5+4XRc8XAZ55MtfMdBZXRKQEsZu9Y2atQD9wHrAVeAy4zN3XV/zDRESkYFUZ03f3\nYTP7G2AF6SmbCnwRkRqr2ZezRERk4tXkdJ+ZXWRmz5jZs2b25VrUkI+ZvWBmvzWzp8zssWjdVDNb\nYWb9ZrbczCp0kdOi6rrdzAbMbE3Gurx1mdliM9tgZuvN7IIa1niDmb1oZr+Jfi6qZY3R5842s1Vm\n9rSZrTWzz0Xr47Y/s+u8Llofq31qZp1m9mj0f+ZpM/unaH1s9ucYNcZqX2Z8dktUz7LoeeX2pbtP\n6A/hQPN7YC7QDqwGTproOsao7zlgata6rwNfipa/DNxcg7r+HDgNWDNeXcB84CnC8N0x0f62GtV4\nA/B3ObadV4sao8+eCZwWLU8mnH86KYb7M1+dcdynk6LHVuAR4OwY7s9cNcZuX0af/7fAj4Bl0fOK\n7cta9PTj/sUtY/RvQAuApdHyUuCSCa0IcPcHgZ1Zq/PVdTFwp7sPufsLwAYm4HsSeWqEsE+zLaAG\nNQK4+zZ3Xx0t7wbWA7OJ3/7MVees6OW47dPoQuR0Ev7/7CR++zNXjRCzfWlms4EPALdl1VORfVmL\n0M/1xa1ZebatBQd+ZWaPm9k10bqj3H0Awn9EoMh7ZVXNjDx1Ze/jLdR2H/+Nma02s9syfi2NRY1m\ndgzht5NHyP/3XPNaM+p8NFoVq30aDUc8BWwDku6+jpjtzzw1Qsz2JXAL8EVCFqVUbF/qKzyjne3u\nbyMcaT9rZu9m5M4nx/O4iGNd3wGOc/fTCP/Zvlnjeg4xs8nAfwGfj3rSsfx7zlFn7Papux9099MJ\nvzG928wSxGx/ZtX4HjM7h5jtSzP7IDAQ/YY31lz8kvdlLUJ/CzAn4/nsaF0suPvW6PFl4BeEX5UG\nzOwoADObCbxUuwpHyFfXFuDojO1qto/d/WWPBh+B75L+1bOmNZpZGyFIf+ju90SrY7c/c9UZ130a\n1fYacB9wJjHcnxk1/hI4M4b78mzgYjN7DvgP4L1m9kNgW6X2ZS1C/3HgeDOba2YdwCeAZTWoYxQz\nmxT1qjCzHuACYC2hvoXRZlcC9+RsoPqMkUf/fHUtAz5hZh1mdixwPOELchNeY/QPNOXDwO9iUCPA\n94B17p55h+M47s9RdcZtn5rZEalhETPrBt5HOLkYm/2Zp8bVcduX7n69u89x9+MI2bjK3a8A7qVS\n+3KizkZnnZm+iDATYQOwqBY15KnrWMJsoqcIYb8oWj8NWBnVvAI4vAa1/QT4I7Af2ARcBUzNVxew\nmHAmfz1wQQ1r/AGwJtqvvyCMTdasxuhzzwaGM/6ufxP9m8z791yj/ZmvzljtU+CUqLangN8CX4jW\nx2Z/jlFjrPZlVs3nkJ69U7F9qS9niYg0EZ3IFRFpIgp9EZEmotAXEWkiCn0RkSai0BcRaSIKfRGR\nJqLQFxFpIgp9EZEm8v8BFfxqFwpqM18AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x107dc1a58>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "counter = Counter(nums_entities)\n",
    "plt.plot(list(counter.keys()), list(counter.values()))\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEACAYAAAC9Gb03AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFtpJREFUeJzt3XuQHeV55/HvIwlkxEUIExR0DSBAWMZcDAIHcMawBgFV\nUUIlMYJysBM7pAp2Xa7aDdgV184f63WcimtZjL22Ykxsp4JcGygbjLAFmCnCiouwdeGiuwTSSEIy\nmEsCMh6JZ//oIzyMJc2ZmTOnz+nz/VSdmu4+7/R5Xkb8puftt7sjM5EkVdeYsguQJI0ug16SKs6g\nl6SKM+glqeIMekmqOINekipu0KCPiNsjYmdErDpIm1sjYn1ErIiIMxtboiRpJOo5or8DuOxAb0bE\n5cBJmXkycD3wjQbVJklqgEGDPjMfBV45SJP5wHdrbZ8AJkbE5MaUJ0kaqUaM0U8FtvZb31bbJklq\nAZ6MlaSKG9eAfWwDpvdbn1bb9lsiwhvrSNIwZGYM93vrDfqovfbnHuAG4PsRcT7wambuPNCOqnwT\nte7ubrq7u5v+uS+/DKtXw3PPwY4d8Npr8Prrxdf+r74+uP56OPzw+l7jx0P0+6mX1b9mqXL/qtw3\nqH7/Ioad8UAdQR8R/wJ0Ae+NiC3AfwcOBTIzF2bm4oi4IiI2AG8AnxxRRdqvTNi+vQj0faG+7+uv\nfw2nnVa8pk2DqVOL5YkTi9dRRxVfZ84sAlxSZxk06DPzmjra3NiYcjrbnj2wZg3s2lWE+M9+Blu2\nFEfpW7fCYYcVAf6+9xWvP/mTYv3449995C1J/TVijF41XV1dQ/6eV16BH/0IFi+GJUvg2GNhyhSY\nNQvOOw+uvrpYnzoVJk1qfM1DMZz+tZMq96/KfYPq92+koplj5hGRVR6jH6of/AA+9Sm46CK48kq4\n/PIi0CWpv4hoyslYNVAmfOYzcO+9xZH83LllVySpypxH32Rf+hK85z2wYgWsXGnISxp9Dt00UW8v\nnHEGLF8OM2aUXY2kdjHSoRuDvknefhuuuw6OOw6+8pWyq5HUTgz6NnD//fDFL8KYMcWY/BFHlF2R\npHYy0qB3jH4U7d1bzKq54Qb49Kehp8eQl9R8zroZBb/+Ndx0U3H0PmVKceL1qKPKrkpSpzLoG+yt\nt2D+/OI+MYsWFVeuvuc9ZVclqZM5dNMge/fCrbfC5Mlw9NFw991w1lmGvKTyeUTfAG++WVzVuns3\nPPVUcfsCSWoVzroZoUz48z8vpk9+73vFzBpJaiRvgVCyhQuLK1wff9yQl9SaPKIfgb4+mD4dHngA\nTj+97GokVZXz6EuSCf/wD3DKKYa8pNbm0M0w9PXBX/xFMT/+vvvKrkaSDs6gH6JM+Ou/Lp7T+sQT\nMGFC2RVJ0sEZ9EN0993FiVdDXlK78GTsEOzeDXPmwLe+BRdfXHY1kjqFd69sovnzi2e63n572ZVI\n6iTOo2+Sn/60OPm6YUPZlUjS0Bj0dejrg2uuge9+Fw45pOxqJGlonEc/iLffhi98AU44AS69tOxq\nJGnoPKIfxC23FFe+fv3rZVciScPjydhBnHkm3HYbXHhh2ZVI6lTeAmEUrVwJO3fChz5UdiWSNHwG\n/QFkwmc/W4zPjx1bdjWSNHwG/QE88ghs3w5/9VdlVyJJI2PQH8CiRfCJT8A4T1dLanOejN2P7duL\nWw8vXw4zZpRdjaRO58nYUfCd78Cf/ZkhL6kaDPr9uPde+OM/LrsKSWoMh24G2LWreGrUzp0wfnzZ\n1UiSQzcNd8cd8NGPGvKSqsMj+n5efBFOO614sMipp5ZdjSQVmnJEHxHzImJNRKyLiJv28/57I+L+\niFgREU9HxCeGW1CZfvzj4mjekJdUJYMGfUSMAW4DLgPmAAsiYvaAZjcCKzLzTOAjwFciou1moN93\nH1xxRdlVSFJj1XNEPxdYn5kvZGYfsAiYP6DNi8CRteUjgZczc0/jyhx9fX3w4INw+eVlVyJJjVXP\nUfdUYGu/9V6K8O/vH4GHImI7cATwscaU1zxLl8KsWTB5ctmVSFJjNWp45XPAysz8SEScBDwQER/I\nzP8Y2LC7u/ud5a6uLrq6uhpUwsjcdx9ceWXZVUgS9PT00NPT07D9DTrrJiLOB7ozc15t/WYgM/PL\n/dosBr6Ymf+vtv4QcFNmPjVgXy076+b974dvfxvmDvxbRZJK1oxZN8uAWRExMyIOBa4G7hnQZjXw\nn2oFTQZOATYNt6hme/754kKpc84puxJJarxBh24yc29E3AgsofjFcHtmro6I64u3cyHwJeCOiFgJ\nBPA3mfnL0Sy8kb72NViwAMZ4+ZikCur4C6ZefhlOPrl4mtT06WVXI0m/zVsgjNCtt8JVVxnykqqr\n7S5qaqR/+7di2Oaxx8quRJJGT0cP3Vx3HZx7Ltx4Y9mVSNKBOXQzAo8+ChdfXHYVkjS6Ojbo16+H\nN96A2QPv2iNJFdOxQX/nncXjAp1SKanqOjLmMougX7Cg7EokafR1ZNCvWgW/+hWcf37ZlUjS6OvI\noL/zTrj6aohhn8OWpPbRkUH/0EPeqVJS5+i4efR79sDEicXzYY88cvD2klQ259EP0bp1MGWKIS+p\nc3Rc0D/4IFxwQdlVSFLzdFzQ33UX/Omfll2FJDVPR43Rv/FG8UzYXbtgwoTSypCkIXGMfgiWLoWz\nzzbkJXWWjgr6Zcu8SEpS5+mooF+1Cj7wgbKrkKTm6qigf/ppOP30squQpObqmKB/6SXo7YXTTiu7\nEklqro4J+sWL4ZJL4NBDy65EkpqrI4I+E776Vbj22rIrkaTm64h59Bs3woc/DFu3+qARSe3HefR1\neOqp4iHghrykTtQR0fezn8E555RdhSSVo2OC/oMfLLsKSSpH5cfoM+GYY2DtWjjuuKZ+tCQ1hGP0\ng9i0CY44wpCX1LkqH/QO20jqdJUP+qeeMugldbbKB/0jj8DcuWVXIUnlqXTQr1oF27YVtz6QpE5V\n6aC//3646ioYN67sSiSpPJUO+p4e6OoquwpJKldl59H39cF73wubNxdfJaldNWUefUTMi4g1EbEu\nIm46QJuuiFgeEc9ExMPDLahRfv5zOOEEQ16SBh29jogxwG3AJcB2YFlE/DAz1/RrMxH4GnBpZm6L\niGNHq+B6LV0KF15YdhWSVL56jujnAusz84XM7AMWAfMHtLkGuCsztwFk5kuNLXPonnnGxwZKEtQX\n9FOBrf3We2vb+jsFOCYiHo6IZRHx8UYVOFzPPgvvf3/ZVUhS+Ro18XAccDZwMXA48FhEPJaZGxq0\n/yHJLIJ+zpwyPl2SWks9Qb8NmNFvfVptW3+9wEuZ+SvgVxHxCHAG8FtB393d/c5yV1cXXaMw/3HL\nFjjqKJg0qeG7lqRR19PTQ09PT8P2N+j0yogYC6ylOBm7A3gSWJCZq/u1mQ18FZgHjAeeAD6Wmc8N\n2FdTplcuXgy33AJLloz6R0nSqBvp9MpBj+gzc29E3AgsoRjTvz0zV0fE9cXbuTAz10TET4BVwF5g\n4cCQb6ZnnnHYRpL2qeQFU9ddVzwM/C//ctQ/SpJGnQ8e2Q9PxErSb1TuiP7tt+HII2HHjuKErCS1\nO4/oB9i8GY491pCXpH0qF/QO20jSu1Uu6J1xI0nvVrmg99YHkvRulQx6j+gl6TcqNetmz57iJOwv\nfgGHHz5qHyNJTeWsm342bIDjjzfkJam/SgX9E0/AueeWXYUktZZKBf3SpfD7v192FZLUWioV9MuX\nwznnlF2FJLWWSgX9xo0wa1bZVUhSa6lM0L/6Krz1FvzO75RdiSS1lsoE/ebNcNJJEMOegCRJ1VSZ\noF+/vgh6SdK7VSbovceNJO1fpYL+9NPLrkKSWk9lgt573EjS/lXiXjeZcNhh8PLL3v5AUvV4rxvg\npZdgwgRDXpL2pxJBv3UrTJ9edhWS1JoMekmquEoE/caNMHNm2VVIUmuqRNA/+CB0dZVdhSS1praf\ndfP223DkkdDbC5MmNXTXktQSOn7Wzb7HBhrykrR/bR/0vb0wdWrZVUhS62r7oN+2DaZNK7sKSWpd\nbR/0vb0GvSQdTNsH/datDt1I0sG0fdA//zyccELZVUhS62r7oN+0yaCXpIOpRNCfeGLZVUhS62rr\noP/3f4c334TJk8uuRJJaV1sH/ebNxbCNDwSXpAOrK+gjYl5ErImIdRFx00HanRsRfRFxVeNKPDCH\nbSRpcIMGfUSMAW4DLgPmAAsiYvYB2v0d8JNGF3kgBr0kDa6eI/q5wPrMfCEz+4BFwPz9tPvPwL8C\nuxpY30E540aSBldP0E8FtvZb761te0dETAH+KDP/D9C0EfN16+Dkk5v1aZLUnhp1MvYWoP/YfVPC\n/tlnYc6cZnySJLWvcXW02QbM6Lc+rbatv3OARRERwLHA5RHRl5n3DNxZd3f3O8tdXV10DfOJIb/8\nZTG9csaMwdtKUjvp6emhp6enYfsb9MEjETEWWAtcAuwAngQWZObqA7S/A7g3M+/ez3sNe/BITw98\n/vOwdGlDdidJLWukDx4Z9Ig+M/dGxI3AEoqhntszc3VEXF+8nQsHfstwixmKxx+H885rxidJUntr\n20cJzp8P11wDH/tYQ3YnSS2rYx8luGIFnH122VVIUutryyP611+H448vvo4d24DCJKmFdeQR/XPP\nwezZhrwk1aMtg37duiLoJUmDa8ug37IFZs4suwpJag9tG/TTp5ddhSS1h7YM+q1bvSJWkurVlkG/\nZYtBL0n1arugz3ToRpKGou2C/rXXikcHTpxYdiWS1B7aLuj3Ddv4nFhJqk9bBr3DNpJUP4Nekiqu\n7YJ+wwaYNavsKiSpfbRd0K9bB6ecUnYVktQ+2i7o166FU08tuwpJah9tdZviPXtgwoTiWbHjxzew\nMElqYR11m+KXXoKjjzbkJWko2irod+6EyZPLrkKS2otBL0kVZ9BLUsUZ9JJUcW0V9C++aNBL0lC1\nVdA//7yPEJSkoWqroN+4EU46qewqJKm9tE3QZ8KmTQa9JA1V2wT9yy/DmDFwzDFlVyJJ7aVtgn7j\nRjjxxLKrkKT201ZB77CNJA2dQS9JFdc2Qe+JWEkanrYJeo/oJWl4DHpJqri2ePDI7t0waRK88QaM\nHTsKhUlSC+uIB49s3lzc+sCQl6Sha4ugd9hGkoavrqCPiHkRsSYi1kXETft5/5qIWFl7PRoRpzey\nSINekoZv0KCPiDHAbcBlwBxgQUTMHtBsE/DhzDwD+B/APzaySINekoavniP6ucD6zHwhM/uARcD8\n/g0y8/HMfK22+jgwtZFFGvSSNHz1BP1UYGu/9V4OHuSfAu4fSVEDrV8Ps2Y1co+S1DnGNXJnEfER\n4JPAhQdq093d/c5yV1cXXV1dB93n7t3Q22vQS+ocPT099PT0NGx/g86jj4jzge7MnFdbvxnIzPzy\ngHYfAO4C5mXmxgPsa8jz6FeuhGuvhWeeGdK3SVJlNGMe/TJgVkTMjIhDgauBewYUMYMi5D9+oJAf\nrjVrYPbAU7+SpLoNOnSTmXsj4kZgCcUvhtszc3VEXF+8nQuBLwDHAF+PiAD6MnNuIwrs7YXp0xux\nJ0nqTHWN0Wfmj4FTB2z7Zr/lTwOfbmxphR07YMqU0dizJHWGlr8ydvt2OP74squQpPbV8kHvEb0k\njUzLB/327Qa9JI1ES9+meM8emDgRdu2Cww8fxcIkqYVV+jbFGzYU4/OGvCQNX0sH/apVcHpD74Mp\nSZ2npYN++XI444yyq5Ck9tbSQf/YY/ChD5VdhSS1t5Y9Gbt3Lxx9NGzZUjwvVpI6VWVPxj7/PBxz\njCEvSSPVskG/di2ceurg7SRJB9fSQe9dKyVp5Fo66D2il6SRM+glqeIMekmquJYM+tdfL15TD/YI\ncklSXVoy6J97rjiaH9OS1UlSe2nJKF2xAs46q+wqJKkaWjbozzyz7CokqRpaMuiXLzfoJalRWu5e\nN/seNrJjBxx1VJMKk6QWVrl73axfXzxsxJCXpMZouaB/8kn44AfLrkKSqqPlgv7RR+Gii8quQpKq\no6WCPhMeegg+/OGyK5Gk6mipoF+2DMaN8zmxktRILRX0Dz8MV14JMexzy5KkgVoq6FeudP68JDVa\nywX9GWeUXYUkVUvLXDC1e3fxjNhXX4Xx45tWkiS1vMpcMPXss3DyyYa8JDVaywS9wzaSNDpaJugf\negguuKDsKiSpelpijP7NN2HKFFi3Do47rmnlSFJbqMQY/eLFcO65hrwkjYa6gj4i5kXEmohYFxE3\nHaDNrRGxPiJWRMSQZsN/85tw7bVD+Q5JUr0GDfqIGAPcBlwGzAEWRMTsAW0uB07KzJOB64Fv1FvA\nww/D5s3VCPqenp6ySxhV9q99VblvUP3+jVQ9R/RzgfWZ+UJm9gGLgPkD2swHvguQmU8AEyNi8mA7\nzoS//Vvo7oZDDhla4a2o6v/Y7F/7qnLfoPr9G6l6gn4qsLXfem9t28HabNtPm3d56y34+78vLpBa\nsKCeUiVJw9H0k7GvvlpcGHX00fDAA/CjH8HYsc2uQpI6x6DTKyPifKA7M+fV1m8GMjO/3K/NN4CH\nM/P7tfU1wB9k5s4B+2reXE5JqpCRTK8cV0ebZcCsiJgJ7ACuBgYOttwD3AB8v/aL4dWBIT/SQiVJ\nwzNo0Gfm3oi4EVhCMdRze2aujojri7dzYWYujogrImID8AbwydEtW5JUr6ZeGStJar6mnYyt56Kr\nVhcRt0fEzohY1W/bpIhYEhFrI+InETGx33ufq11EtjoiLi2n6vpExLSI+GlEPBsRT0fEf6ltr0r/\nxkfEExGxvNbH/1nbXon+QXHNS0T8PCLuqa1XqW/PR8TK2s/vydq2KvVvYkT831q9z0bEeQ3tX2aO\n+oviF8oGYCZwCLACmN2Mz25wPy4EzgRW9dv2ZeBvass3AX9XW34fsJxieOz3av2PsvtwkL79LnBm\nbfkIYC0wuyr9q9U8ofZ1LPA4cEHF+vdZ4J+Be6r0b7NW8yZg0oBtVerfPwGfrC2PAyY2sn/NOqKv\n56KrlpeZjwKvDNg8H/hObfk7wB/Vlv8QWJSZezLzeWA9xX+HlpSZL2bmitryfwCrgWlUpH8Amflm\nbXE8xcHHK1SkfxExDbgC+Fa/zZXoW03w2yMQlehfRBwFXJSZdwDU6n6NBvavWUFfz0VX7eq4rM0w\nyswXgX23ZhvyRWStIiJ+j+Ivl8eByVXpX21oYznwItCTmc9Rnf79L+C/Af1PulWlb1D064GIWBYR\nn6ptq0r/TgBeiog7akNvCyNiAg3sX0vcvbJi2vrsdkQcAfwr8Jnakf3A/rRt/zLz7cw8i+IvlYsi\noosK9C8irgR21v4iO9gU5rbrWz8XZObZFH+13BARF1GBn13NOOBs4Gu1Pr4B3EwD+9esoN8GzOi3\nPq22rQp27ruvT0T8LrCrtn0bML1fu5bvc0SMowj572XmD2ubK9O/fTLzdWAxcA7V6N8FwB9GxCbg\nTuDiiPge8GIF+gZAZu6off0F8AOKoYoq/OygGOHYmplP1dbvogj+hvWvWUH/zkVXEXEoxUVX9zTp\nsxstePdR0z3AJ2rL1wE/7Lf96og4NCJOAGYBTzaryGH6NvBcZv7vftsq0b+IOHbfrIWIOAz4KMUJ\nrbbvX2Z+PjNnZOaJFP9v/TQzPw7cS5v3DSAiJtT+0iQiDgcuBZ6mAj87gNrwzNaIOKW26RLgWRrZ\nvyaeVZ5HMZNjPXBz2We5h9mHfwG2A28BWyguDJsEPFjr2xLg6H7tP0dxRnw1cGnZ9Q/StwuAvRQz\nopYDP6/9zI6pSP9Or/VpObAS+K+17ZXoX7+a/4DfzLqpRN8oxrD3/bt8el9+VKV/tXrPoDggXgHc\nTTHrpmH984IpSao4T8ZKUsUZ9JJUcQa9JFWcQS9JFWfQS1LFGfSSVHEGvSRVnEEvSRX3/wEZcN4I\nLxlQngAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10fb574a8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "%matplotlib inline\n",
    "counter = Counter(nums_entities)\n",
    "keys = sorted(counter.keys())\n",
    "values = [counter[key] for key in keys]\n",
    "plt.plot(keys, np.cumsum(values)/sum(values))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEACAYAAABI5zaHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGeVJREFUeJzt3XuYVNWZ7/HvC0YEQQQciYD3RElQgaA84o1GvKAnERNN\nRBKjPsbwGLzFxANh5kzaiXow4/1oDDgELwgS0QhGjBiHjvEKKipC040abo3ijOAgCNo07/ljFdA0\n3fStqtauXb/P89TTVdXb8ucS3l797rXXNndHRETSpU3sACIikn0q7iIiKaTiLiKSQiruIiIppOIu\nIpJCKu4iIinUaHE3s0lmtsbM3tnNMXeb2VIze8vM+mU3ooiINFdTZu6TgTMb+qaZnQUc7u5fB0YB\nv89SNhERaaFGi7u7vwis280hw4GHMse+BnQ2s+7ZiSciIi2RjZ57T2BlrddVmfdERCQSnVAVEUmh\nPbLwGVXAgbVe98q8twsz00Y2IiIt4O7WnOObWtwt86jPLGA0MN3Mjgc+dfc1uwnYnHxRlJaWUlpa\nGjtGo5Qzuwoh57hxpVxxRSnr1rHTY+3aHc83bICNG8Pj8893fb5+Pey7L/TqFR49e+543qsX9OgB\nHTvCXntB+/bh6x7NnAYWwlhC4eQ0a1ZdB5pQ3M1sKlACdDOzFcCvgT0Bd/eJ7j7bzM42s/eAjcCl\nzU4hImzaBJWVUFUFq1bteNR+vXEjPPggdOkCXbuGr7UfvXtDp06w997QoUP4Wvt5hw7QuTO0axf7\nv1ZyrdHi7u4jm3DMldmJI1I8Nm2CV1+FsrLweOMNOOSQnWfRJ5yw88z6zjvhhhsiB5eCkI2ee+qU\nlJTEjtAkyplduc5ZXzE/+mgoKYFx40Ih79Rp958xZEhuM2aL/p/HZ/nsgZuZF0LPXSRb1q+HP/8Z\nHn8cnnsOvvnNUMxLSuDEExsv5iIQeu7NPaGq4i6SZWvXwqxZoaD/7W9w8slw3nkwfDh06xY7nRQi\nFXeRCLZsgYoKeOmlUNBfeQWGDg0F/dvfDitTRFpDxV0kxzZtgoULYcGCHY933w3LBwcOhHPPhbPO\nCksJRbJFxV0ky7ZuDTPxadPCSdD334cjj4T+/Xc8+vaFffaJnVTSTMVdJEsWLoSpU0NR79ABRo4M\nM/KjjtIaccm/lhR3LYUUyfjHP0IxnzoVPvsMRoyAmTPhmGOgBRcIikSlmbsUvTfegOuug/JyOP/8\nMEs/4QRoo231JCE0cxdphs8+g3/91zBTHz8efvQj+MpXYqcSyQ7NTaQozZwJffrAp5/CokVw6aUq\n7JIumrlLUVm5Eq6+GhYvDhtwDRkSO5FIbmjmLkWhpgbuuissXezXD955R4Vd0k0zd0m9igr44Q/D\nhUUvvhi2xRVJO83cJdX+9jc45RS47DKYO1eFXYqHZu6SWg8/DL/8ZVgNM3Ro7DQi+aXiLqnjDqWl\nobjPnRu22RUpNirukipffBFaMO+9F/aE6d49diKRONRzl9T45BM4/fRQ4OfOVWGX4qbiLqmwdCkM\nGhS2DZg+Hdq3j51IJC4Vdyl4L78c7nZ0/fVhGwHtCSOijcOkwH38cdhPfdIkOPvs2GlEckP7uUtR\ncYfvfjesXR8/PnYakdzRrpBSVB58MOzBPn167CQiyaOZuxSk5cvh2GPh+efDzTRE0qwlM3edepKC\ns3UrXHJJuPpUhV2kfiruUnDuuguqq0NxF5H6qS0jBWXxYhg8GF59FQ4/PHYakfxQW0ZS7csv4aKL\n4OabVdhFGqPiLgXjxhvhq1+Fn/wkdhKR5NNSSCkIr70GEyfCggVgzfrlVKQ4aeYuiff556Edc889\ncMABsdOIFAadUJXEu+oqWLcOpkyJnUQkDl2hKqlTVgZPPgkLF8ZOIlJY1JaRxNq0CS6/HO69F/bd\nN3YakcKitowk1pgxsGyZ9o4Rydk6dzMbZmZLzKzSzMbU8/1uZvaMmb1lZgvN7JLmhBCp68034YEH\n4O67YycRKUyNztzNrA1QCQwFVgPzgRHuvqTWMb8G9nL3X5nZfkAF0N3dt9T5LM3cpVHV1TBwIFx7\nLVx8cew0IvHlauY+EFjq7svdvRp4FBhe55iPgE6Z552AT+oWdpGmuu022H9/+PGPYycRKVxNWS3T\nE1hZ6/UqQsGv7X7geTNbDXQELshOPCk2lZVw663w+uu6WEmkNbK1FPJXwNvuPsTMDgeeM7Nj3H1D\n3QNLS0u3Py8pKaGkpCRLEaTQbd0aVsf8y7/AIYfETiMST1lZGWVlZa36jKb03I8HSt19WOb1WMDd\n/ZZax8wGbnL3lzKvnwfGuPvrdT5LPXdp0IQJMHkyvPQStG0bO41IcuSq5z4f+JqZHWxmewIjgFl1\njikHTsuE6A4cAXzQnCBS3Kqqwox90iQVdpFsaLQt4+41ZnYlMIfww2CSu5eb2ajwbZ8I/F9gspm9\nDRjwv919bS6DS3q4w89+BqNHQ58+sdOIpIMuYpLo/vhHuOGGsLa9XbvYaUSSpyVtGRV3ieqTT+Co\no+CJJ2DQoNhpRJJJxV0Kzs9/Hu6wdO+9sZOIJJeKuxSU1avDrH3x4nCHJRGpn4q7FJSrrgo99ltv\njZ1EJNlU3KVgrFwJ/fpBeXnYakBEGpazXSFFsu2mm8LVqCrsIrmhmbvk3bJlMGAAVFTAfvvFTiOS\nfJq5S0G48Ua44goVdpFc0j1UJa/efz/cE3Xp0thJRNJNM3fJq9/8JqyS6dIldhKRdNPMXfKmshKe\nfhreey92EpH008xd8ubf/i3cOq9z59hJRNJPq2UkL8rLYfDg0HPv1Knx40VkB62WkcQqLYVf/EKF\nXSRfNHOXnFu4EE4/PfTaO3aMnUak8GjmLolUWgrXX6/CLpJPmrlLTi1YAGefHXrtHTrETiNSmDRz\nl8S54QYYO1aFXSTfNHOXnFm4EM44Az74ANq3j51GpHBp5i6JcvPN4U5LKuwi+aeZu+TE0qVwwglh\n1q7ljyKto5m7JMb48TB6tAq7SCyauUvWrVgR7rL03nvQtWvsNCKFTzN3SYRbb4XLLlNhF4lJM3fJ\nqjVr4BvfgEWL4IADYqcRSQfN3CW6O+6ACy9UYReJTTN3yZp16+BrX4M334SDD46dRiQ9NHOXqO65\nB845R4VdJAk0c5es2LABDjsM/v53OPLI2GlE0kUzd4lmwgQYMkSFXSQpNHOXVtu8OczaZ88O69tF\nJLs0c5coJk+Gb31LhV0kSTRzl1aproYjjoCpU2HQoNhpRNJJM3fJu2nT4NBDVdhFkkYzd2mxrVuh\nT5+wBHLo0NhpRNIrZzN3MxtmZkvMrNLMxjRwTImZLTCzd81sbnNCSGF66qmw6+Opp8ZOIiJ1NTpz\nN7M2QCUwFFgNzAdGuPuSWsd0Bl4GznD3KjPbz93/u57P0sw9RU45JWzre8EFsZOIpFuuZu4DgaXu\nvtzdq4FHgeF1jhkJPO7uVQD1FXZJl/nzYflyOO+82ElEpD5NKe49gZW1Xq/KvFfbEUBXM5trZvPN\n7KJsBZRkuv12uOYa2GOP2ElEpD7Z+qu5B/At4FRgb+AVM3vF3d/L0udLgqxYAXPmhKtSRSSZmlLc\nq4CDar3ulXmvtlXAf7v7ZmCzmb0A9AV2Ke6lpaXbn5eUlFBSUtK8xBLd3XfDpZfCPvvETiKSTmVl\nZZSVlbXqM5pyQrUtUEE4ofohMA+40N3Lax3TG/h/wDCgHfAacIG7L67zWTqhWuDWrw/r2hcsgIMO\navx4EWm9lpxQbXTm7u41ZnYlMIfQo5/k7uVmNip82ye6+xIzexZ4B6gBJtYt7JIOkybBGWeosIsk\nnS5ikibbsiXcjOOxx+C442KnESke2n5AcuqJJ8KMXYVdJPlU3KVJ3OG22+C662InEZGmUHGXJnn5\nZVi7Fr7zndhJRKQpVNylSW67Da69Ftq2jZ1ERJpCJ1SlUe+/D8cfD8uWwd57x04jUnx0QlVy4s47\n4fLLVdhFColm7rJb69bB4YfDu+9Cjx6x04gUJ83cJesmTAgnUVXYRQqLZu7SoC+/DFsNzJ4NffvG\nTiNSvDRzl6z64x/hG99QYRcpRCru0qApU+CnP42dQkRaQm0ZqdfataElU1UFHTvGTiNS3NSWkayZ\nORNOO02FXaRQqbhLvR57DL7//dgpRKSl1JaRXaxbBwcfHFoynTrFTiMiastIVsyaBUOHqrCLFDIV\nd9mFWjIihU9tGdnJ//wPHHggrFqlG2CLJIXaMtJqs2bBkCEq7CKFTsVddjJjBpx/fuwUItJaasvI\nduvXQ69esHIldO4cO42IbKO2jLTKU0/BKaeosIukgYq7bDdjhlbJiKSF2jICwGefQc+esHw5dOkS\nO42I1Ka2jLTY00/DSSepsIukhYq7ALpwSSRt1JYRNmwILZl//AO6do2dRkTqUltGWmT2bBg0SIVd\nJE1U3IXHHtOFSyJpo7ZMkdu4EXr0gPffh/32i51GROqjtow02zPPwMCBKuwiaaPiXuR04ZJIOqkt\nU8Q2bYIDDoDKSth//9hpRKQhastIszzzDAwYoMIukkYq7kVMLRmR9FJbpkht2BC2962ogO7dY6cR\nkd3JWVvGzIaZ2RIzqzSzMbs57jgzqzaz7zUnhOTf44/DySersIukVaPF3czaAPcAZwJ9gAvNrHcD\nx40Hns12SMm+Bx6ASy6JnUJEcqUpM/eBwFJ3X+7u1cCjwPB6jrsKmAF8nMV8kgPLlsHChfDtb8dO\nIiK50pTi3hNYWev1qsx725lZD+Bcd78PaFZfSPLvoYdgxAho1y52EhHJlT2y9Dl3ArV78SrwCeUO\nDz4I06fHTiIiudSU4l4FHFTrda/Me7UdCzxqZgbsB5xlZtXuPqvuh5WWlm5/XlJSQklJSTMjS2u8\n+CLstVdY3y4iyVRWVkZZWVmrPqPRpZBm1haoAIYCHwLzgAvdvbyB4ycDT7n7E/V8T0shI7vsMujd\nG66/PnYSEWmqliyFbHTm7u41ZnYlMIfQo5/k7uVmNip82yfW/UeaE0DyZ+NGeOIJWLw4dhIRyTVd\nxFREpkyBqVPDzTlEpHBobxnZLa1tFykemrkXiRUroH9/qKoKJ1RFpHBo5i4Nevhh+MEPVNhFikW2\n1rlLgm1b2/7ww7GTiEi+aOZeBF55Bdq2DbfTE5HioOJeBB54AC6+GEzXDYsUDZ1QTblNm6Bnz7BR\nWM+ejR8vIsmjE6qyiyefDO0YFXaR4qLinnJa2y5SnNSWSbFVq+CYY8La9vbtY6cRkZZSW0Z2MmVK\nuAG2CrtI8VFxTyl3tWREipmKe0rNmwdbt8Lxx8dOIiIxqLin1JQpcNFFWtsuUqx0QjWFqqvD0sdX\nX4XDDoudRkRaSydUBYA5c+DrX1dhFylmKu4pNGUK/OhHsVOISExqy6TM+vVw4IHwwQfQrVvsNCKS\nDWrLCH/6E5SUqLCLFDsV95RRS0ZEQG2ZVFm9Go46StsNiKSN2jJFbto0+O53VdhFRMU9VR55BH74\nw9gpRCQJVNxTYtEi+PhjGDw4dhIRSQIV95R45BEYOTLcK1VERCdUU2DrVjj0UHjqqbB/u4iki06o\nFqkXX4TOnVXYRWQHFfcU0Np2EalLbZkCt3lz2AHyrbfCtgMikj5qyxSh2bOhb18VdhHZmYp7gVNL\nRkTqo7ZMAVu7NqySWbEinFAVkXRSW6bIzJgBZ5yhwi4iu1JxL2BqyYhIQ9SWKVDLlsGxx4adIPfc\nM3YaEckltWWKyCOPwPe/r8IuIvVrUnE3s2FmtsTMKs1sTD3fH2lmb2ceL5rZ0dmPKtts2gT33guj\nRsVOIiJJ1WhxN7M2wD3AmUAf4EIz613nsA+AU9y9L3AjcH+2g8oO998PAwdCv36xk4hIUu3RhGMG\nAkvdfTmAmT0KDAeWbDvA3V+tdfyrQM9shpQdNm+G3/4WZs6MnUREkqwpbZmewMpar1ex++L9E+CZ\n1oSShv3hD9C/PwwYEDuJiCRZU2buTWZmQ4BLgZMaOqa0tHT785KSEkpKSrIZIdW++ALGjw/r20Uk\nvcrKyigrK2vVZzS6FNLMjgdK3X1Y5vVYwN39ljrHHQM8Dgxz9/cb+CwthWyFCRPgySfhGf1eJFJU\nWrIUsinFvS1QAQwFPgTmARe6e3mtYw4CngcuqtN/r/tZKu4t9OWXcMQR4SbYgwbFTiMi+dSS4t5o\nW8bda8zsSmAOoUc/yd3LzWxU+LZPBP4P0BX4nZkZUO3uA5v/nyANeeihUNxV2EWkKXSFagGoroYj\njwwF/qQGz2aISFrpCtWUmjIl7P6owi4iTaWZe8Jt2QK9e8OkSTB4cOw0IhKDZu4pNG1auI2eCruI\nNIdm7glWUwPf/Cbcdx+cemrsNCISi2buKTN9OvzTP8GQIbGTiEih0cw9oWpq4Oij4c47w92WRKR4\naeaeIjNmhNvnnX567CQiUoiyureMZEdNDfzmN/Dv/w7WrJ/VIiKBZu4JdN990K0bDBsWO4mIFCr1\n3BNmzRo46igoK4M+fWKnEZEkyMnGYdmk4t64H/8YuncPLRkREcjRxmGSPy+8AHPnQnl548eKiOyO\neu4JUV0NP/sZ3H47dOwYO42IFDoV94S4+27o0QPOPz92EhFJA/XcE6CqCvr2hZdfDnu2i4jUpouY\nCtR118EVV6iwi0j26IRqZH/9K8ybB5Mnx04iImmimXtEX3wBo0eHfnuHDrHTiEiaqLhHdNtt4fZ5\n3/lO7CQikjY6oRrJsmVw7LEwf364hZ6ISEN0QrWAXHtteKiwi0gu6IRqBNOnw+LF4auISC6ouOfZ\n3/8OV10Fzz4L7drFTiMiaaW2TB4tWhSuQJ06Ffr3j51GRNJMxT1PVq6Es86CO+6A006LnUZE0k7F\nPQ/WrQs33rj6ahg5MnYaESkGWgqZY5s3hxtcDxgQdnzUbfNEpLl0s46EqamBH/wAvvKV0Gdvo9+T\nRKQFdLOOBHGHa64JLZlnnlFhF5H8UnHPkfHjw7LHF17QkkcRyT8V9yzbuhV+9zuYMCHsz965c+xE\nIlKMVNyzxB2eew5+9avQgnn22XBnJRGRGFTcs2DePBg7NtxR6aab4LzztCpGROLSab5WqKgIV5x+\n73swYgS8+254rcIuIrGpuLdAVRVcfjmcdBIcdxxUVsJPfxqWPIqIJEGTiruZDTOzJWZWaWZjGjjm\nbjNbamZvmVm/7MaMr6YG5swJV5gefTR07RqK+pgxuouSiCRPo8XdzNoA9wBnAn2AC82sd51jzgIO\nd/evA6OA3+cga96UlZVtf15RAePGwcEHwz//M5x4IixdCrfcAl26xMsIO+dMMuXMnkLICMqZBE2Z\nuQ8Elrr7cnevBh4Fhtc5ZjjwEIC7vwZ0NrPuWU2aR3/5SxkTJ8IJJ8DgwfDll/CXv4S7Jo0eDd26\nxU4YFMofTOXMnkLICMqZBE1ZLdMTWFnr9SpCwd/dMVWZ99a0Kl0OuMOnn8Lq1eHx4Yc7f129Gl5/\nPdzXdNw4OPNM9dJFpPCkailkeTmcey5s2RIe1dU7nm97XV0Ne+8d1qD36AEHHBC+HnpomKn36AF/\n/nNou4iIFKpGNw4zs+OBUncflnk9FnB3v6XWMb8H5rr79MzrJcBgd19T57OKZ9cwEZEsysXGYfOB\nr5nZwcCHwAjgwjrHzAJGA9MzPww+rVvYWxJORERaptHi7u41ZnYlMIdwAnaSu5eb2ajwbZ/o7rPN\n7Gwzew/YCFya29giIrI7ed3PXURE8iNvV6g25UKoJDCzZWb2tpktMLN5sfNsY2aTzGyNmb1T670u\nZjbHzCrM7Fkzi74HZQM5f21mq8zszcxjWOSMvczsP81skZktNLOrM+8najzryXlV5v2kjWc7M3st\n83dmkZndnHk/aePZUM5EjWcmU5tMllmZ180ey7zM3DMXQlUCQ4HVhD7+CHdfkvN/eTOZ2QfAAHdf\nFztLbWZ2ErABeMjdj8m8dwvwibv/NvMDs4u7j01gzl8Dn7n77TGzbWNmXwW+6u5vmVlH4A3CtRqX\nkqDx3E3OC0jQeAKYWQd3/9zM2gIvAb8AziFB47mbnKeRvPH8OTAA2Mfdz2nJ3/V8zdybciFUUhgJ\n3HPH3V8E6v7AGQ48mHn+IHBuXkPVo4GcEMY1Edz9I3d/K/N8A1AO9CJh49lAzp6ZbydmPAHc/fPM\n03aEvz/rSNh4QoM5IUHjaWa9gLOB/6j1drPHMl9FrL4LoXo2cGxsDjxnZvPN7PLYYRqx/7ZVSe7+\nEbB/5Dy7c2Vm36H/iP3reW1mdgjQD3gV6J7U8ayV87XMW4kaz0wbYQHwEVDm7otJ4Hg2kBOSNZ53\nANcTatE2zR7LxM1QE+BEd/8W4Sfn6EyboVAk9ez474DD3L0f4S9VIn79zbQ6ZgDXZGbGdccvEeNZ\nT87Ejae7b3X3/oTfgE42sxISOJ51cp5iZoNJ0Hia2f8C1mR+Y9vdbxONjmW+insVcFCt170y7yWO\nu3+Y+fpfwJ/YdauFJFmzbQ+fTH/248h56uXu/+U7Tu7cDxwXMw+Ame1BKJgPu/vMzNuJG8/6ciZx\nPLdx9/XAbOBYEjie22RyPg0cm7DxPBE4J3Pubxpwqpk9DHzU3LHMV3HffiGUme1JuBBqVp7+3U1m\nZh0ysyTMbG/gDODduKl2Yuz803wWcEnm+cXAzLr/QCQ75cz8YdzmeyRjTP8ALHb3u2q9l8Tx3CVn\n0sbTzPbb1sows/bA6cACEjaeDeR8K0nj6e7j3P0gdz+MUCf/090vAp6iuWPp7nl5AMOACmApMDZf\n/95mZjwUeIvwB3NhknICUwkrjb4AVhBWdnQB/poZ1znAvgnN+RDwTmZsnyT0D2NmPBGoqfX/+s3M\nn8+uSRrP3eRM2ngencm2AHgb+GXm/aSNZ0M5EzWetfIOBma1dCx1EZOISArphKqISAqpuIuIpJCK\nu4hICqm4i4ikkIq7iEgKqbiLiKSQiruISAqpuIuIpND/BxAm8YgYK10ZAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10e922668>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "%matplotlib inline\n",
    "counter = Counter(nums_ques_words)\n",
    "keys = sorted(counter.keys())\n",
    "values = [counter[key] for key in keys]\n",
    "plt.plot(keys, np.cumsum(values)/sum(values))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
